2
* $Id: encode.c,v 1.7 2004/05/31 16:08:41 andrew_belov Exp $
3
* ---------------------------------------------------------------------------
4
* The data compression procedures are located in this module.
11
#include <dos.h> /* Weird, eh? */
14
DEBUGHDR(__FILE__) /* Debug information block */
18
unsigned short *c_freq;
19
unsigned short FAR *c_code;
20
unsigned short FAR *heap;
21
unsigned short len_cnt[17];
22
unsigned short p_freq[2*NP-1];
23
unsigned short pt_code[NPT];
25
unsigned char FAR *buf;
26
unsigned char output_mask;
27
unsigned short output_pos;
29
unsigned short t_freq[2*NT-1];
31
unsigned short FAR *sortptr;
34
unsigned short fpcount;
37
unsigned short treesize;
38
unsigned char *tree=NULL;
39
unsigned short numchars;
40
unsigned short dicsiz_cur;
41
unsigned short dicpos;
42
unsigned short tc_passes;
44
unsigned int dic_alloc;
45
unsigned long encoded_bytes;
47
/* Inline functions */
50
putbits(c_len[c], c_code[c])
62
putbits(pt_len[qc], pt_code[qc]); \
67
/* Bitwise output routine */
69
void putbits(int n_c, unsigned short n_x)
79
mov cl, byte ptr bitcount
86
mov byte ptr bitcount, cl
99
mov ah, byte ptr bitbuf+1
108
mov ah, byte ptr bitbuf
121
mov al, byte ptr bitbuf
163
mov byte ptr bitcount, cl
187
out_buffer[bt++]=bitbuf>>8;
202
out_buffer[bt++]=bitbuf;
211
/* Quick fill routine */
213
static void fill_fpbuf()
228
for(i=0; i<fpcount; i++)
233
/* Reduces the number of bits for smaller files */
241
/* Reads data from the source file or a memory region */
244
int fetch_uncomp(char *dest, int n)
246
unsigned int fetch_size;
249
return(fread_crc(dest, n, encstream));
254
fetch_size=min((unsigned int)n, encmem_remain);
255
far_memmove((char FAR *)dest, encblock_ptr, fetch_size);
256
crc32_for_block(dest, fetch_size);
257
encmem_remain-=fetch_size;
258
encblock_ptr+=fetch_size;
264
/* Fills the length table depending on the leaf depth (call with i==root) */
266
static void count_len(int i)
271
len_cnt[(depth<16)?depth:16]++;
281
/* Makes length counter table */
283
static void NEAR make_len(int root)
293
cum+=len_cnt[i]<<(16-i);
296
if(debug_enabled&&strchr(debug_opt, 'f')!=NULL)
297
msg_cprintf(0, M_HDF_FIX);
318
/* Sends i-th entry down the heap */
320
static void NEAR downheap(int i)
325
while((j=2*i)<=heapsize)
327
if(j<heapsize&&freq[heap[j]]>freq[heap[j+1]])
329
if(freq[k]<=freq[heap[j]])
337
/* Encodes a length table element */
339
static void NEAR make_code(int n, unsigned char *len, unsigned short FAR *code)
342
unsigned short start[18];
346
start[i+1]=(start[i]+len_cnt[i])<<1;
348
code[i]=start[len[i]]++;
351
/* Makes tree, calculates len[], returns root */
353
static int make_tree(int nparm, unsigned short *freqparm, unsigned char *lenparm, unsigned short FAR *codeparm)
363
for(i=0; i<st_n; i++)
374
for(i=heapsize>>1; i>=1; i--)
375
downheap(i); /* Make priority queue */
377
/* While queue has at least two entries */
380
i=heap[1]; /* Take out least-freq entry */
383
heap[1]=heap[heapsize--];
385
j=heap[1]; /* Next least-freq entry */
388
k=avail++; /* Generate new node */
389
freq[k]=freq[i]+freq[j];
391
downheap(1); /* Put into queue */
397
make_code(nparm, lenparm, codeparm);
398
return(k); /* Return root */
401
/* Counts the cumulative frequency */
410
while(n>0&&c_len[n-1]==0)
419
while(i<n&&c_len[i]==0)
441
/* Writes the encoded length */
443
void write_pt_len(int n, int nbit, int i_special)
447
while(n>0&&pt_len[n-1]==0)
459
putbits(k-3, 0xFFFE);
462
while(i<6&&pt_len[i]==0)
469
/* Writes character length */
476
while(n>0&&c_len[n-1]==0)
486
while(i<n&&c_len[i]==0)
493
for(k=0; k<count; k++)
494
putbits(pt_len[0], pt_code[0]);
498
putbits(pt_len[1], pt_code[1]);
503
putbits(pt_len[0], pt_code[0]);
504
putbits(pt_len[1], pt_code[1]);
509
putbits(pt_len[2], pt_code[2]);
510
putbits(CBIT, count-20);
514
putbits(pt_len[k+2], pt_code[k+2]);
518
/* Encodes a block and writes it to the output file */
522
unsigned int i, k, flags=0, root, pos, size;
527
root=make_tree(NC, c_freq, c_len, c_code);
533
root=make_tree(NT, t_freq, pt_len, (unsigned short FAR *)pt_code);
535
write_pt_len(NT, TBIT, 3);
550
root=make_tree(NP, p_freq, pt_len, (unsigned short FAR *)pt_code);
552
write_pt_len(NP, PBIT, -1);
559
for(i=0; i<size; i++)
567
if(flags&(1U<<(CHAR_BIT-1)))
569
c=(unsigned int)buf[pos++]+(1<<CHAR_BIT);
572
k|=(unsigned int)buf[pos++]<<CHAR_BIT;
588
/* Handy macro for retrieval of two bytes (don't care about the portability) */
590
#ifdef ALIGN_POINTERS
591
#define word_ptr(c) ( (((unsigned char *) (c))[0]<<8)+ ((unsigned char *) (c))[1] )
592
#define _diff(c1,c2) ( (c1)[0]!=(c2)[0] || (c1)[1]!=(c2)[1] )
593
#elif !defined(TILED)
594
#define word_ptr(c) (*(unsigned short *)(c))
597
/* Tree update routine. Possibly the most time-consuming one. */
599
static int upd_tree(int n_c)
608
mov es, word ptr dtree+2
683
jmp short ut_loopbody
713
mov es, word ptr dtree+2
752
unsigned char *tdptr;
753
unsigned char *prev_tptr, *prev_tdptr;
759
if((r_bx=dptr[n_c])>=0)
768
r_ax=word_ptr(tptr+r_bp-1);
778
} while(r_ax!=word_ptr(tdptr+r_bx-1));
781
#ifdef ALIGN_POINTERS
782
if (_diff(tptr,tdptr))
784
if(word_ptr(tptr)!=word_ptr(tdptr))
793
#ifdef ALIGN_POINTERS
794
if (_diff(tptr,tdptr))
796
if(word_ptr(tptr)!=word_ptr(tdptr))
802
diff=*(tdptr)-*(tptr);
803
remainder=tdptr-prev_tdptr;
810
if(tptr-tdptr<=dicsiz_cur)
821
return(tc_passes=r_bp);
825
/* Optimized output routine */
827
static void output(int c, unsigned short p)
829
unsigned char FAR *bptr;
837
output_mask=(cy?0x80:0)|((output_mask&0xFF)>>1);
859
buf[output_pos]|=output_mask;
862
for(r_bx=0; p!=0; p>>=1)
869
/* Unstubbed optimized output routine */
871
static void output_opt(unsigned char c)
873
unsigned char FAR *bptr, FAR *cptr;
880
output_mask=(cy?0x80:0)|((output_mask&0xFF)>>1);
903
/* Initializes memory for encoding */
905
void allocate_memory()
909
if((c_freq=calloc(NC*2-1, sizeof(*c_freq)))==NULL)
910
error(M_OUT_OF_NEAR_MEMORY);
911
if((c_code=farcalloc(NC, sizeof(*c_code)))==NULL)
912
error(M_OUT_OF_MEMORY);
913
if((heap=farcalloc(NC+1, sizeof(*heap)))==NULL)
914
error(M_OUT_OF_MEMORY);
918
bufsiz=current_bufsiz;
919
if(bufsiz>=MAX_BUFSIZ-BUFSIZ_INCREMENT)
921
#ifdef FINETUNE_BUFSIZ
924
/* Adjust the buffer size if there's not enough memory for it */
925
while((buf=farmalloc(bufsiz))==NULL)
927
#ifndef FINETUNE_BUFSIZ
928
bufsiz=bufsiz/10U*9U;
933
bufsiz=bufsiz/16U*15U;
935
if(bufsiz<MIN_BUFSIZ)
936
error(M_OUT_OF_MEMORY);
938
if(debug_enabled&&strchr(debug_opt, 'v')!=NULL)
939
msg_cprintf(0, M_BUFSIZ, bufsiz);
944
buf[output_pos]='\0';
948
/* Shutdown the encoding */
950
void NEAR huf_encode_end()
963
/* Basic encoding routine */
965
static void NEAR huf_encode()
968
unsigned short fp_max;
972
unsigned short t; /* Exchange variable */
974
unsigned int n_passes;
975
unsigned int f_dicpos;
977
unsigned int max_fetch; /* For comparison */
981
unsigned short r_cx, r_dx, r_ax;
984
hash_bits=(dicbit+2)/3;
989
if((tree=calloc(treesize+2, sizeof(*tree)))==NULL)
990
error(M_OUT_OF_NEAR_MEMORY);
992
ftree=farcalloc_based((unsigned long)treesize+16L, sizeof(*ftree));
993
dtree=(FP_OFF(ftree)==0)?ftree:MK_FP(FP_SEG(ftree)+((FP_OFF(ftree)+15)>>4), 0);
995
ftree=dtree=farcalloc((unsigned long)treesize+16L, sizeof(*ftree));
997
fpbuf=farcalloc((unsigned long)fpcount+4L, 2L);
998
if(ftree==NULL||fpbuf==NULL)
999
error(M_OUT_OF_MEMORY);
1004
nchars=(UCHAR_MAX+THRESHOLD)*2;
1005
display_indicator(0L);
1017
if((k=j-tree_el)<=0)
1023
memmove(tree, tree+k, tree_el);
1025
max_fetch=fetch=(unsigned int)(treesize-tree_el);
1026
if(multivolume_option)
1027
fetch=check_multivolume(fetch);
1028
if(max_fetch!=fetch)
1030
if((fetch=fetch_uncomp(tree+tree_el, fetch))==0)
1032
if(tree_el==0||k==0)
1034
memmove(tree+k, tree, tree_el);
1035
dicsiz_cur=min(tree_el-i-1, dicsiz_cur);
1039
encoded_bytes+=(unsigned long)fetch;
1040
display_indicator(encoded_bytes);
1047
for(l=fpcount>>2; l>0; l--)
1049
*fptr=max(*fptr-k, -1);
1051
*fptr=max(*fptr-k, -1);
1053
*fptr=max(*fptr-k, -1);
1055
*fptr=max(*fptr-k, -1);
1059
for(l=tree_el>>3; l>0; l--)
1061
*dtptr=max(dtptr[k]-k, -1);
1063
*dtptr=max(dtptr[k]-k, -1);
1065
*dtptr=max(dtptr[k]-k, -1);
1067
*dtptr=max(dtptr[k]-k, -1);
1069
*dtptr=max(dtptr[k]-k, -1);
1071
*dtptr=max(dtptr[k]-k, -1);
1073
*dtptr=max(dtptr[k]-k, -1);
1075
*dtptr=max(dtptr[k]-k, -1);
1078
/* Store the remainder */
1079
for(l=tree_el%8; l>0; l--)
1081
*dtptr=max(dtptr[k]-k, -1);
1089
r_dx=(unsigned short)*(tptr++);
1090
r_cx=(fp_max&0xFF00)|(hash_bits&0xFF);
1091
r_dx<<=(hash_bits&0xFF);
1092
r_dx^=(unsigned short)*(tptr++);
1094
for(l=j-2; m<l; m++)
1112
n_passes=(unsigned int)tc_passes;
1113
f_dicpos=(int)dicpos;
1114
if((r_ax=upd_tree(m))>i)
1116
if(n_passes<THRESHOLD||(n_passes==THRESHOLD&&f_dicpos>16384)||--r_ax>n_passes||(r_ax==n_passes&&dicpos>>1<f_dicpos))
1118
output_opt(tree[pm]);
1124
r_ax=n_passes+UCHAR_MAX-2;
1125
output(r_ax, f_dicpos);
1139
if((r_ax=upd_tree(m))>i)
1141
if(n_passes<THRESHOLD||(n_passes==THRESHOLD&&f_dicpos>16384)||--r_ax>n_passes||(r_ax==n_passes&&dicpos>>1<f_dicpos))
1143
output_opt(tree[pm]);
1149
r_ax=n_passes+UCHAR_MAX-2;
1150
output(r_ax, f_dicpos);
1151
if((r_ax=upd_tree(m))>i)
1157
farfree_based(ftree);
1166
/* Encoding stub for -m3 */
1168
static void NEAR huf_encode_m3()
1171
unsigned short fp_max;
1173
unsigned short t; /* Exchange variable */
1177
unsigned char *tptr;
1178
unsigned short r_cx, r_dx;
1181
hash_bits=(dicbit+2)/3;
1186
if((tree=calloc(treesize+2, sizeof(*tree)))==NULL)
1187
error(M_OUT_OF_NEAR_MEMORY);
1189
ftree=farcalloc_based((unsigned long)treesize+16L, sizeof(*ftree));
1190
dtree=(FP_OFF(ftree)==0)?ftree:MK_FP(FP_SEG(ftree)+((FP_OFF(ftree)+15)>>4), 0);
1192
ftree=dtree=farcalloc((unsigned long)treesize+16L, sizeof(*ftree));
1194
fpbuf=farcalloc((unsigned long)fpcount+4L, 2L);
1195
if(ftree==NULL||fpbuf==NULL)
1196
error(M_OUT_OF_MEMORY);
1199
display_indicator(encoded_bytes=0L);
1204
if(dic_alloc!=0&&j!=0)
1207
if((k=j-tree_el)<=0)
1213
memmove(tree, tree+k, tree_el);
1215
fetch=(unsigned int)(treesize-tree_el);
1216
if(multivolume_option)
1217
fetch=check_multivolume(fetch);
1218
if((fetch=fetch_uncomp(tree+tree_el, fetch))==0)
1223
encoded_bytes+=(unsigned long)fetch;
1224
display_indicator(encoded_bytes);
1228
r_dx=(unsigned short)*(tptr++);
1229
r_cx=(fp_max&0xFF00)|(hash_bits&0xFF);
1230
r_dx<<=(hash_bits&0xFF);
1231
r_dx^=(unsigned short)*(tptr++);
1247
if((r_ax=upd_tree(m))>i)
1251
r_ax=(unsigned short)tree[m];
1259
output(r_ax, dicpos);
1268
farfree_based(ftree);
1277
/* Encodes a single file. */
1279
void encode(int method)
1281
char *dsw; /* Debug switch (-hd) pointer */
1284
numchars=UCHAR_MAX+5;
1288
dicsiz_cur=DICSIZ-2;
1290
/* Method 0 (store) is already filtered away at this point */
1294
numchars=UCHAR_MAX+5;
1307
error(M_UNKNOWN_METHOD);
1309
switch(max_compression)
1313
dic_alloc=DICSIZ+512;
1317
dic_alloc=DICSIZ+512;
1323
dicsiz_cur=dic_alloc=16384;
1329
dicsiz_cur=dic_alloc=8192;
1339
case 'd': /* Dictionary size */
1340
dicsiz_cur=(unsigned int)strtol(dsw, &dsw, 10);
1342
case 'g': /* G-size */
1343
dic_alloc=(int)strtol(dsw, &dsw, 10);
1346
dicbit=(int)strtol(dsw, &dsw, 10);
1349
numchars=(int)strtol(dsw, &dsw, 10);
1352
treesize=(int)strtol(dsw, &dsw, 10);
1356
if(strchr(debug_opt, 'v')!=NULL)
1357
msg_cprintf(0, M_PRECOMP_STAT, numchars, dicbit, dicsiz_cur, dic_alloc, treesize);
1359
if(dicsiz_cur>DICSIZ_MAX)
1360
error(M_LARGE_DICTIONARY);
1361
if(dic_alloc>treesize)
1362
error(M_LARGE_GSIZE);
1369
/* Fast search stub for method 4 */
1371
static void enc4_pass1(int n_c)
1419
call far ptr putbits
1434
call far ptr putbits
1455
/* Dictionary position lookup */
1457
static void enc4_pass2(int n_c)
1505
call far ptr putbits
1521
call far ptr putbits
1525
unsigned short r_bx=1<<9;
1535
putbits(r_cx-9, -1);
1542
/* Encodes a single file, using method 4 */
1548
unsigned short fp_max;
1549
unsigned char *tptr;
1551
unsigned short r_cx, r_dx, r_ax;
1559
hash_bits=(dicbit+2)/3;
1564
if((tree=calloc(treesize+2, sizeof(*tree)))==NULL)
1565
error(M_OUT_OF_NEAR_MEMORY);
1567
ftree=farcalloc_based((unsigned long)treesize+16L, sizeof(*ftree));
1568
dtree=(FP_OFF(ftree)==0)?ftree:MK_FP(FP_SEG(ftree)+((FP_OFF(ftree)+15)>>4), 0);
1570
ftree=dtree=farcalloc((unsigned long)treesize+16L, sizeof(*ftree));
1572
fpbuf=farcalloc((unsigned long)fpcount+4L, 2L);
1573
if(ftree==NULL||fpbuf==NULL)
1574
error(M_OUT_OF_MEMORY);
1578
display_indicator(encoded_bytes=0L);
1582
if(multivolume_option)
1583
fetch=check_multivolume(fetch);
1584
if((fetch=fetch_uncomp(tree, fetch))==0)
1586
encoded_bytes+=(unsigned long)fetch;
1587
display_indicator(encoded_bytes);
1591
r_dx=(unsigned short)*(tptr++);
1592
r_cx=(fp_max&0xFF00)|(hash_bits&0xFF);
1593
r_dx<<=(hash_bits&0xFF);
1594
r_dx^=(unsigned short)*(tptr++);
1596
for(m=0; m<fetch; m++)
1599
r_dx^=(unsigned char)*(tptr++);
1609
if((r_ax=upd_tree(m))>i)
1613
putbits(9, (unsigned char)tree[m]);
1630
farfree_based(ftree);