2
/*-------------------------------------------------------------*/
3
/*--- Compression machinery (not incl block sorting) ---*/
5
/*-------------------------------------------------------------*/
7
/* ------------------------------------------------------------------
8
This file is part of bzip2/libbzip2, a program and library for
9
lossless, block-sorting data compression.
11
bzip2/libbzip2 version 1.0.4 of 20 December 2006
12
Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
14
Please read the WARNING, DISCLAIMER and PATENTS sections in the
17
This program is released under the terms of the license contained
19
------------------------------------------------------------------ */
23
0.9.0 -- original version.
24
0.9.0a/b -- no changes in this file.
25
0.9.0c -- changed setting of nGroups in sendMTFValues()
26
so as to do a bit better on small files
29
#include "bzlib_private.h"
32
/*---------------------------------------------------*/
33
/*--- Bit stream I/O ---*/
34
/*---------------------------------------------------*/
36
/*---------------------------------------------------*/
37
void BZ2_bsInitWrite ( EState* s )
44
/*---------------------------------------------------*/
46
void bsFinishWrite ( EState* s )
48
while (s->bsLive > 0) {
49
s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
57
/*---------------------------------------------------*/
60
while (s->bsLive >= 8) { \
62
= (UChar)(s->bsBuff >> 24); \
70
/*---------------------------------------------------*/
73
void bsW ( EState* s, Int32 n, UInt32 v )
76
s->bsBuff |= (v << (32 - s->bsLive - n));
81
/*---------------------------------------------------*/
83
void bsPutUInt32 ( EState* s, UInt32 u )
85
bsW ( s, 8, (u >> 24) & 0xffL );
86
bsW ( s, 8, (u >> 16) & 0xffL );
87
bsW ( s, 8, (u >> 8) & 0xffL );
88
bsW ( s, 8, u & 0xffL );
92
/*---------------------------------------------------*/
94
void bsPutUChar ( EState* s, UChar c )
96
bsW( s, 8, (UInt32)c );
100
/*---------------------------------------------------*/
101
/*--- The back end proper ---*/
102
/*---------------------------------------------------*/
104
/*---------------------------------------------------*/
106
void makeMaps_e ( EState* s )
110
for (i = 0; i < 256; i++)
112
s->unseqToSeq[i] = s->nInUse;
118
/*---------------------------------------------------*/
120
void generateMTFValues ( EState* s )
129
After sorting (eg, here),
130
s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
132
((UChar*)s->arr2) [ 0 .. s->nblock-1 ]
133
holds the original block data.
135
The first thing to do is generate the MTF values,
137
((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
138
Because there are strictly fewer or equal MTF values
139
than block values, ptr values in this area are overwritten
140
with MTF values only when they are no longer needed.
142
The final compressed bitstream is generated into the
144
(UChar*) (&((UChar*)s->arr2)[s->nblock])
146
These storage aliases are set up in bzCompressInit(),
147
except for the last one, which is arranged in
150
UInt32* ptr = s->ptr;
151
UChar* block = s->block;
152
UInt16* mtfv = s->mtfv;
157
for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
161
for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
163
for (i = 0; i < s->nblock; i++) {
165
AssertD ( wr <= i, "generateMTFValues(1)" );
166
j = ptr[i]-1; if (j < 0) j += s->nblock;
167
ll_i = s->unseqToSeq[block[j]];
168
AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
178
mtfv[wr] = BZ_RUNB; wr++;
179
s->mtfFreq[BZ_RUNB]++;
181
mtfv[wr] = BZ_RUNA; wr++;
182
s->mtfFreq[BZ_RUNA]++;
184
if (zPend < 2) break;
185
zPend = (zPend - 2) / 2;
191
register UChar* ryy_j;
192
register UChar rll_i;
197
while ( rll_i != rtmp ) {
198
register UChar rtmp2;
205
j = ryy_j - &(yy[0]);
206
mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
216
mtfv[wr] = BZ_RUNB; wr++;
217
s->mtfFreq[BZ_RUNB]++;
219
mtfv[wr] = BZ_RUNA; wr++;
220
s->mtfFreq[BZ_RUNA]++;
222
if (zPend < 2) break;
223
zPend = (zPend - 2) / 2;
228
mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
234
/*---------------------------------------------------*/
235
#define BZ_LESSER_ICOST 0
236
#define BZ_GREATER_ICOST 15
239
void sendMTFValues ( EState* s )
241
Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
242
Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
243
Int32 nGroups, nBytes;
246
UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
247
is a global since the decoder also needs it.
249
Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
250
Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
251
are also globals only used in this proc.
252
Made global to keep stack frame size small.
256
UInt16 cost[BZ_N_GROUPS];
257
Int32 fave[BZ_N_GROUPS];
259
UInt16* mtfv = s->mtfv;
261
if (s->verbosity >= 3)
262
VPrintf3( " %d in block, %d after MTF & 1-2 coding, "
263
"%d+2 syms in use\n",
264
s->nblock, s->nMTF, s->nInUse );
266
alphaSize = s->nInUse+2;
267
for (t = 0; t < BZ_N_GROUPS; t++)
268
for (v = 0; v < alphaSize; v++)
269
s->len[t][v] = BZ_GREATER_ICOST;
271
/*--- Decide how many coding tables to use ---*/
272
AssertH ( s->nMTF > 0, 3001 );
273
if (s->nMTF < 200) nGroups = 2; else
274
if (s->nMTF < 600) nGroups = 3; else
275
if (s->nMTF < 1200) nGroups = 4; else
276
if (s->nMTF < 2400) nGroups = 5; else
279
/*--- Generate an initial set of coding tables ---*/
281
Int32 nPart, remF, tFreq, aFreq;
287
tFreq = remF / nPart;
290
while (aFreq < tFreq && ge < alphaSize-1) {
292
aFreq += s->mtfFreq[ge];
296
&& nPart != nGroups && nPart != 1
297
&& ((nGroups-nPart) % 2 == 1)) {
298
aFreq -= s->mtfFreq[ge];
302
if (s->verbosity >= 3)
303
VPrintf5( " initial group %d, [%d .. %d], "
304
"has %d syms (%4.1f%%)\n",
305
nPart, gs, ge, aFreq,
306
(100.0 * (float)aFreq) / (float)(s->nMTF) );
308
for (v = 0; v < alphaSize; v++)
309
if (v >= gs && v <= ge)
310
s->len[nPart-1][v] = BZ_LESSER_ICOST; else
311
s->len[nPart-1][v] = BZ_GREATER_ICOST;
320
Iterate up to BZ_N_ITERS times to improve the tables.
322
for (iter = 0; iter < BZ_N_ITERS; iter++) {
324
for (t = 0; t < nGroups; t++) fave[t] = 0;
326
for (t = 0; t < nGroups; t++)
327
for (v = 0; v < alphaSize; v++)
331
Set up an auxiliary length table which is used to fast-track
332
the common case (nGroups == 6).
335
for (v = 0; v < alphaSize; v++) {
336
s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
337
s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
338
s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
347
/*--- Set group start & end marks. --*/
348
if (gs >= s->nMTF) break;
349
ge = gs + BZ_G_SIZE - 1;
350
if (ge >= s->nMTF) ge = s->nMTF-1;
353
Calculate the cost of this group as coded
354
by each of the coding tables.
356
for (t = 0; t < nGroups; t++) cost[t] = 0;
358
if (nGroups == 6 && 50 == ge-gs+1) {
359
/*--- fast track the common case ---*/
360
register UInt32 cost01, cost23, cost45;
362
cost01 = cost23 = cost45 = 0;
364
# define BZ_ITER(nn) \
365
icv = mtfv[gs+(nn)]; \
366
cost01 += s->len_pack[icv][0]; \
367
cost23 += s->len_pack[icv][1]; \
368
cost45 += s->len_pack[icv][2]; \
370
BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
371
BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
372
BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
373
BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
374
BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
375
BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
376
BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
377
BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
378
BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
379
BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
383
cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
384
cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
385
cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
388
/*--- slow version which correctly handles all situations ---*/
389
for (i = gs; i <= ge; i++) {
390
UInt16 icv = mtfv[i];
391
for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
396
Find the coding table which is best for this group,
397
and record its identity in the selector table.
399
bc = 999999999; bt = -1;
400
for (t = 0; t < nGroups; t++)
401
if (cost[t] < bc) { bc = cost[t]; bt = t; };
404
s->selector[nSelectors] = bt;
408
Increment the symbol frequencies for the selected table.
410
if (nGroups == 6 && 50 == ge-gs+1) {
411
/*--- fast track the common case ---*/
413
# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
415
BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
416
BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
417
BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
418
BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
419
BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
420
BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
421
BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
422
BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
423
BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
424
BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
429
/*--- slow version which correctly handles all situations ---*/
430
for (i = gs; i <= ge; i++)
431
s->rfreq[bt][ mtfv[i] ]++;
436
if (s->verbosity >= 3) {
437
VPrintf2 ( " pass %d: size is %d, grp uses are ",
439
for (t = 0; t < nGroups; t++)
440
VPrintf1 ( "%d ", fave[t] );
445
Recompute the tables based on the accumulated frequencies.
447
/* maxLen was changed from 20 to 17 in bzip2-1.0.3. See
448
comment in huffman.c for details. */
449
for (t = 0; t < nGroups; t++)
450
BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
451
alphaSize, 17 /*20*/ );
455
AssertH( nGroups < 8, 3002 );
456
AssertH( nSelectors < 32768 &&
457
nSelectors <= (2 + (900000 / BZ_G_SIZE)),
461
/*--- Compute MTF values for the selectors. ---*/
463
UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
464
for (i = 0; i < nGroups; i++) pos[i] = i;
465
for (i = 0; i < nSelectors; i++) {
466
ll_i = s->selector[i];
469
while ( ll_i != tmp ) {
476
s->selectorMtf[i] = j;
480
/*--- Assign actual codes for the tables. --*/
481
for (t = 0; t < nGroups; t++) {
484
for (i = 0; i < alphaSize; i++) {
485
if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
486
if (s->len[t][i] < minLen) minLen = s->len[t][i];
488
AssertH ( !(maxLen > 17 /*20*/ ), 3004 );
489
AssertH ( !(minLen < 1), 3005 );
490
BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
491
minLen, maxLen, alphaSize );
494
/*--- Transmit the mapping table. ---*/
497
for (i = 0; i < 16; i++) {
499
for (j = 0; j < 16; j++)
500
if (s->inUse[i * 16 + j]) inUse16[i] = True;
504
for (i = 0; i < 16; i++)
505
if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
507
for (i = 0; i < 16; i++)
509
for (j = 0; j < 16; j++) {
510
if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
513
if (s->verbosity >= 3)
514
VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes );
517
/*--- Now the selectors. ---*/
519
bsW ( s, 3, nGroups );
520
bsW ( s, 15, nSelectors );
521
for (i = 0; i < nSelectors; i++) {
522
for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
525
if (s->verbosity >= 3)
526
VPrintf1( "selectors %d, ", s->numZ-nBytes );
528
/*--- Now the coding tables. ---*/
531
for (t = 0; t < nGroups; t++) {
532
Int32 curr = s->len[t][0];
534
for (i = 0; i < alphaSize; i++) {
535
while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
536
while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
541
if (s->verbosity >= 3)
542
VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
544
/*--- And finally, the block data proper ---*/
549
if (gs >= s->nMTF) break;
550
ge = gs + BZ_G_SIZE - 1;
551
if (ge >= s->nMTF) ge = s->nMTF-1;
552
AssertH ( s->selector[selCtr] < nGroups, 3006 );
554
if (nGroups == 6 && 50 == ge-gs+1) {
555
/*--- fast track the common case ---*/
557
UChar* s_len_sel_selCtr
558
= &(s->len[s->selector[selCtr]][0]);
559
Int32* s_code_sel_selCtr
560
= &(s->code[s->selector[selCtr]][0]);
562
# define BZ_ITAH(nn) \
563
mtfv_i = mtfv[gs+(nn)]; \
565
s_len_sel_selCtr[mtfv_i], \
566
s_code_sel_selCtr[mtfv_i] )
568
BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
569
BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
570
BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
571
BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
572
BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
573
BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
574
BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
575
BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
576
BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
577
BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
582
/*--- slow version which correctly handles all situations ---*/
583
for (i = gs; i <= ge; i++) {
585
s->len [s->selector[selCtr]] [mtfv[i]],
586
s->code [s->selector[selCtr]] [mtfv[i]] );
594
AssertH( selCtr == nSelectors, 3007 );
596
if (s->verbosity >= 3)
597
VPrintf1( "codes %d\n", s->numZ-nBytes );
601
/*---------------------------------------------------*/
602
void BZ2_compressBlock ( EState* s, Bool is_last_block )
606
BZ_FINALISE_CRC ( s->blockCRC );
607
s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
608
s->combinedCRC ^= s->blockCRC;
609
if (s->blockNo > 1) s->numZ = 0;
611
if (s->verbosity >= 2)
612
VPrintf4( " block %d: crc = 0x%08x, "
613
"combined CRC = 0x%08x, size = %d\n",
614
s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
619
s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
621
/*-- If this is the first block, create the stream header. --*/
622
if (s->blockNo == 1) {
623
BZ2_bsInitWrite ( s );
624
bsPutUChar ( s, BZ_HDR_B );
625
bsPutUChar ( s, BZ_HDR_Z );
626
bsPutUChar ( s, BZ_HDR_h );
627
bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
632
bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
633
bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
634
bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
636
/*-- Now the block's CRC, so it is in a known place. --*/
637
bsPutUInt32 ( s, s->blockCRC );
640
Now a single bit indicating (non-)randomisation.
641
As of version 0.9.5, we use a better sorting algorithm
642
which makes randomisation unnecessary. So always set
643
the randomised bit to 'no'. Of course, the decoder
644
still needs to be able to handle randomised blocks
645
so as to maintain backwards compatibility with
646
older versions of bzip2.
650
bsW ( s, 24, s->origPtr );
651
generateMTFValues ( s );
656
/*-- If this is the last block, add the stream trailer. --*/
659
bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
660
bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
661
bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
662
bsPutUInt32 ( s, s->combinedCRC );
663
if (s->verbosity >= 2)
664
VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC );
670
/*-------------------------------------------------------------*/
671
/*--- end compress.c ---*/
672
/*-------------------------------------------------------------*/