100
throw InputExc ("Error in Huffman-encoded data (invalid code).");
108
throw InputExc ("Error in Huffman-encoded data "
105
114
invalidTableSize ()
107
throw InputExc ("Error in Huffman-encoded data (invalid code table size).");
116
throw InputExc ("Error in Huffman-encoded data "
117
"(invalid code table size).");
122
unexpectedEndOfTable ()
124
throw InputExc ("Error in Huffman-encoded data "
125
"(unexpected end of code table data).");
132
throw InputExc ("Error in Huffman-encoded data "
133
"(code table is longer than expected).");
140
throw InputExc ("Error in Huffman-encoded data "
141
"(invalid code table entry).");
173
207
hufCanonicalCodeTable (Int64 hcode[HUF_ENCSIZE])
177
for (int i = 0; i < 58; ++i)
212
// For each i from 0 through 58, count the
213
// number of different codes of length i, and
214
// store the count in n[i].
217
for (int i = 0; i <= 58; ++i)
180
220
for (int i = 0; i < HUF_ENCSIZE; ++i)
181
221
n[hcode[i]] += 1;
224
// For each i from 58 through 1, compute the
225
// numerically lowest code with length i, and
226
// store that code in n[i].
185
for (int i = 57; i > 0; --i)
231
for (int i = 58; i > 0; --i)
187
233
Int64 nc = ((c + n[i]) >> 1);
239
// hcode[i] contains the length, l, of the
240
// code for symbol i. Assign the next available
241
// code of length l to the symbol and store both
242
// l and the code in hcode[i].
192
245
for (int i = 0; i < HUF_ENCSIZE; ++i)
194
247
int l = hcode[i];
222
275
int* iM) // o: max frq index
225
// Find min/max indices in frq, mark leaves
228
static const int LEAF = 0x80000;
229
static const int INDEX = 0x7ffff;
231
// Temporary table dependencies,
232
// and heap for quickly finding min frq value
278
// This function assumes that when it is called, array frq
279
// indicates the frequency of all possible symbols in the data
280
// that are to be Huffman-encoded. (frq[i] contains the number
281
// of occurrences of symbol i in the data.)
283
// The loop below does three things:
285
// 1) Finds the minimum and maximum indices that point
286
// to non-zero entries in frq:
288
// frq[im] != 0, and frq[i] == 0 for all i < im
289
// frq[iM] != 0, and frq[i] == 0 for all i > iM
291
// 2) Fills array fHeap with pointers to all non-zero
294
// 3) Initializes array hlink such that hlink[i] == i
295
// for all array entries.
234
298
AutoArray <int, HUF_ENCSIZE> hlink;
235
299
AutoArray <Int64 *, HUF_ENCSIZE> fHeap;
239
303
while (!frq[*im])
244
308
for (int i = *im; i < HUF_ENCSIZE; i++)
250
314
fHeap[nf] = &frq[i];
256
(*iM)++; // add entry for run-length code
321
// Add a pseudo-symbol, with a frequency count of 1, to frq;
322
// adjust the fHeap and hlink array accordingly. Function
323
// hufEncode() uses the pseudo-symbol for run-length encoding.
258
328
fHeap[nf] = &frq[*iM];
332
// Build an array, scode, such that scode[i] contains the number
333
// of bits assigned to symbol i. Conceptually this is done by
334
// constructing a tree whose leaves are the symbols with non-zero
337
// Make a heap that contains all symbols with a non-zero frequency,
338
// with the least frequent symbol on top.
340
// Repeat until only one symbol is left on the heap:
342
// Take the two least frequent symbols off the top of the heap.
343
// Create a new node that has first two nodes as children, and
344
// whose frequency is the sum of the frequencies of the first
345
// two nodes. Put the new node back into the heap.
347
// The last node left on the heap is the root of the tree. For each
348
// leaf node, the distance between the root and the leaf is the length
349
// of the code for the corresponding symbol.
351
// The loop below doesn't actually build the tree; instead we compute
352
// the distances of the leaves from the root on the fly. When a new
353
// node is added to the heap, then that node's descendants are linked
354
// into a single linear list that starts at the new node, and the code
355
// lengths of the descendants (that is, their distance from the root
356
// of the tree) are incremented by one.
261
359
make_heap (&fHeap[0], &fHeap[nf], FHeapCompare());
264
// Init scode & loop on all values
268
// Temporary encoding table
271
361
AutoArray <Int64, HUF_ENCSIZE> scode;
273
362
memset (scode, 0, sizeof (Int64) * HUF_ENCSIZE);
290
379
frq[m ] += frq[mm];
291
380
push_heap (&fHeap[0], &fHeap[nf], FHeapCompare());
293
hlink[mm] &= ~LEAF; // unmark min value
296
// Add 1 bit to all lower list codes
299
for (int j = m; true; j = hlink[j] & INDEX)
383
// The entries in scode are linked into lists with the
384
// entries in hlink serving as "next" pointers and with
385
// the end of a list marked by hlink[j] == j.
387
// Traverse the lists that start at scode[m] and scode[mm].
388
// For each element visited, increment the length of the
389
// corresponding code by one bit. (If we visit scode[j]
390
// during the traversal, then the code for symbol j becomes
393
// Merge the lists that start at scode[m] and scode[mm]
394
// into a single list that starts at scode[m].
398
// Add a bit to all codes in the first list.
401
for (int j = m; true; j = hlink[j])
303
405
assert (scode[j] <= 58);
305
if ((hlink[j] & INDEX) == j)
308
// Merge upper & lower lists
410
// Merge the two lists.
318
// Add 1 bit to all lower list codes
419
// Add a bit to all codes in the second list
321
for (int j = mm; true; j = hlink[j] & INDEX)
422
for (int j = mm; true; j = hlink[j])
325
426
assert (scode[j] <= 58);
327
if ((hlink[j] & INDEX) == j)
434
// Build a canonical Huffman code table, replacing the code
435
// lengths in scode with (code, code length) pairs. Copy the
436
// code table from scode into frq.
332
439
hufCanonicalCodeTable (scode);
334
memcpy (frq, scode, sizeof (Int64) * HUF_ENCSIZE); // overwrite frq
440
memcpy (frq, scode, sizeof (Int64) * HUF_ENCSIZE);
416
522
hufUnpackEncTable
417
523
(const char** pcode, // io: ptr to packed table (updated)
524
int ni, // i : input size (in bytes)
418
525
int im, // i : min hcode index
419
526
int iM, // i : max hcode index
420
527
Int64* hcode) // o: encoding table [HUF_ENCSIZE]
428
535
for (; im <= iM; im++)
538
unexpectedEndOfTable();
430
540
Int64 l = hcode[im] = getBits (6, c, lc, p); // code length
432
542
if (l == (Int64) LONG_ZEROCODE_RUN)
545
unexpectedEndOfTable();
434
547
int zerun = getBits (8, c, lc, p) + SHORTEST_LONG_RUN;
549
if (im + zerun > iM + 1)
486
605
Int64 c = hufCode (hcode[im]);
487
606
int l = hufLength (hcode[im]);
611
// Error: c is supposed to be an l-bit code,
612
// but c contains a value that is greater
613
// than the largest l-bit number.
489
619
if (l > HUF_DECBITS)
893
1044
if (im < 0 || im >= HUF_ENCSIZE || iM < 0 || iM >= HUF_ENCSIZE)
894
1045
invalidTableSize();
1047
const char *ptr = compressed + 20;
898
1049
AutoArray <Int64, HUF_ENCSIZE> freq;
899
1050
AutoArray <HufDec, HUF_DECSIZE> hdec;
901
hufUnpackEncTable (&compressed, im, iM, freq);
1052
hufUnpackEncTable (&ptr, nCompressed - (ptr - compressed), im, iM, freq);
1056
if (nBits > 8 * (nCompressed - (ptr - compressed)))
905
1059
hufBuildDecTable (freq, im, iM, hdec);
906
hufDecode (freq, hdec, compressed, nBits, iM, nRaw, raw);
1060
hufDecode (freq, hdec, ptr, nBits, iM, nRaw, raw);