~ubuntu-branches/ubuntu/precise/pristine-tar/precise : revision 9

1

/* deflate.c -- compress data using the deflation algorithm

2

3

4

5

6

7

This program is free software; you can redistribute it and/or modify

8

it under the terms of the GNU General Public License as published by

9

the Free Software Foundation; either version 2, or (at your option)

10

any later version.

11

12

This program is distributed in the hope that it will be useful,

13

but WITHOUT ANY WARRANTY; without even the implied warranty of

14

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

15

GNU General Public License for more details.

16

17

You should have received a copy of the GNU General Public License

18

along with this program; if not, write to the Free Software Foundation,

19

Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */

20

21

/*

22

* PURPOSE

23

*

24

* Identify new text as repetitions of old text within a fixed-

25

* length sliding window trailing behind the new text.

26

*

27

* DISCUSSION

28

*

29

* The "deflation" process depends on being able to identify portions

30

* of the input text which are identical to earlier input (within a

31

* sliding window trailing behind the input currently being processed).

32

*

33

* The most straightforward technique turns out to be the fastest for

34

* most input files: try all possible matches and select the longest.

35

* The key feature of this algorithm is that insertions into the string

36

* dictionary are very simple and thus fast, and deletions are avoided

37

* completely. Insertions are performed at each input character, whereas

38

* string matches are performed only when the previous match ends. So it

39

* is preferable to spend more time in matches to allow very fast string

40

* insertions and avoid deletions. The matching algorithm for small

41

* strings is inspired from that of Rabin & Karp. A brute force approach

42

* is used to find longer strings when a small match has been found.

43

* A similar algorithm is used in comic (by Jan-Mark Wams) and freeze

44

* (by Leonid Broukhis).

45

* A previous version of this file used a more sophisticated algorithm

46

* (by Fiala and Greene) which is guaranteed to run in linear amortized

47

* time, but has a larger average cost, uses more memory and is patented.

48

* However the F&G algorithm may be faster for some highly redundant

49

* files if the parameter max_chain_length (described below) is too large.

50

*

51

* ACKNOWLEDGEMENTS

52

*

53

* The idea of lazy evaluation of matches is due to Jan-Mark Wams, and

54

* I found it in 'freeze' written by Leonid Broukhis.

55

* Thanks to many info-zippers for bug reports and testing.

56

*

57

* REFERENCES

58

*

59

* APPNOTE.TXT documentation file in PKZIP 1.93a distribution.

60

*

61

* A description of the Rabin and Karp algorithm is given in the book

62

* "Algorithms" by R. Sedgewick, Addison-Wesley, p252.

63

*

64

* Fiala,E.R., and Greene,D.H.

65

* Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595

66

*

67

* INTERFACE

68

*

69

* void lm_init (int pack_level, ush *flags)

70

* Initialize the "longest match" routines for a new file

71

*

72

* void deflate (void)

73

* Processes a new input file. Sets the compressed length, crc,

74

* deflate flags and internal file attributes.

75

*/

76

77

#include <stdio.h>

78

79

#include "gzip.h"

80

81

/* ===========================================================================

82

* Configuration parameters

83

*/

84

85

#define HASH_BITS 15

86

87

#define HASH_SIZE (unsigned)(1<<HASH_BITS)

88

#define HASH_MASK (HASH_SIZE-1)

89

#define WMASK (WSIZE-1)

90

/* HASH_SIZE and WSIZE must be powers of two */

91

92

#define NIL 0

93

/* Tail of hash chains */

94

95

#define FAST 4

96

#define SLOW 2

97

/* speed options for the general purpose bit flag */

98

99

#ifndef TOO_FAR

100

# define TOO_FAR 4096

101

#endif

102

/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */

103

104

#ifndef RSYNC_WIN

105

# define RSYNC_WIN 4096

106

#endif

107

/* Size of rsync window, must be < MAX_DIST */

108

109

#define RSYNC_SUM_MATCH(sum) ((sum) % RSYNC_WIN == 0)

110

/* Whether window sum matches magic value */

111

112

/* ===========================================================================

113

* Local data used by the "longest match" routines.

114

*/

115

116

typedef ush Pos;

117

typedef unsigned IPos;

118

/* A Pos is an index in the character window. We use short instead of int to

119

* save space in the various tables. IPos is used only for parameter passing.

120

*/

121

122

static uch window[2L*WSIZE];

123

/* Sliding window. Input bytes are read into the second half of the window,

124

* and move to the first half later to keep a dictionary of at least WSIZE

125

* bytes. With this organization, matches are limited to a distance of

126

* WSIZE-MAX_MATCH bytes, but this ensures that IO is always

127

* performed with a length multiple of the block size. Also, it limits

128

* the window size to 64K, which is quite useful on MSDOS.

129

*/

130

131

Pos prev[WSIZE];

132

/* Link to older string with same hash index. To limit the size of this

133

* array to 64K, this link is maintained only for the last 32K strings.

134

* An index in this array is thus a window index modulo 32K.

135

*/

136

137

Pos head[1<<HASH_BITS];

138

/* Heads of the hash chains or NIL. */

139

140

ulg window_size = (ulg)2*WSIZE;

141

/* window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the

142

* input file length plus MIN_LOOKAHEAD.

143

*/

144

145

long block_start;

146

/* window position at the beginning of the current output block. Gets

147

* negative when the window is moved backwards.

148

*/

149

150

static unsigned ins_h; /* hash index of string to be inserted */

151

152

#define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)

153

/* Number of bits by which ins_h and del_h must be shifted at each

154

* input step. It must be such that after MIN_MATCH steps, the oldest

155

* byte no longer takes part in the hash key, that is:

156

* H_SHIFT * MIN_MATCH >= HASH_BITS

157

*/

158

159

unsigned int prev_length;

160

/* Length of the best match at previous step. Matches not greater than this

161

* are discarded. This is used in the lazy match evaluation.

162

*/

163

164

unsigned strstart; /* start of string to insert */

165

unsigned match_start; /* start of matching string */

166

static int eofile; /* flag set at end of input file */

167

static unsigned lookahead; /* number of valid bytes ahead in window */

168

169

unsigned max_chain_length;

170

/* To speed up deflation, hash chains are never searched beyond this length.

171

* A higher limit improves compression ratio but degrades the speed.

172

*/

173

174

static unsigned int max_lazy_match;

175

/* Attempt to find a better match only when the current match is strictly

176

* smaller than this value. This mechanism is used only for compression

177

* levels >= 4.

178

*/

179

#define max_insert_length max_lazy_match

180

/* Insert new strings in the hash table only if the match length

181

* is not greater than this length. This saves time but degrades compression.

182

* max_insert_length is used only for compression levels <= 3.

183

*/

184

185

static unsigned good_match;

186

/* Use a faster search when the previous match is longer than this */

187

188

static ulg rsync_sum; /* rolling sum of rsync window */

189

static ulg rsync_chunk_end; /* next rsync sequence point */

190

191

/* Values for max_lazy_match, good_match and max_chain_length, depending on

192

* the desired pack level (0..9). The values given below have been tuned to

193

* exclude worst case performance for pathological files. Better values may be

194

* found for specific files.

195

*/

196

197

typedef struct config {

198

ush good_length; /* reduce lazy search above this match length */

199

ush max_lazy; /* do not perform lazy search above this match length */

200

ush nice_length; /* quit search above this match length */

201

ush max_chain;

202

} config;

203

204

#ifdef FULL_SEARCH

205

# define nice_match MAX_MATCH

206

#else

207

int nice_match; /* Stop searching when current match exceeds this */

208

#endif

209

210

static config configuration_table[10] = {

211

/* good lazy nice chain */

212

/* 0 */ {0, 0, 0, 0}, /* store only */

213

/* 1 */ {4, 4, 8, 4}, /* maximum speed, no lazy matches */

214

/* 2 */ {4, 5, 16, 8},

215

/* 3 */ {4, 6, 32, 32},

216

217

/* 4 */ {4, 4, 16, 16}, /* lazy matches */

218

/* 5 */ {8, 16, 32, 32},

219

/* 6 */ {8, 16, 128, 128},

220

/* 7 */ {8, 32, 128, 256},

221

/* 8 */ {32, 128, 258, 1024},

222

/* 9 */ {32, 258, 258, 4096}}; /* maximum compression */

223

224

/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4

225

* For deflate_fast() (levels <= 3) good is ignored and lazy has a different

226

* meaning.

227

*/

228

229

#define EQUAL 0

230

/* result of memcmp for equal strings */

231

232

/* ===========================================================================

233

* Prototypes for local functions.

234

*/

235

static void fill_window(void);

236

237

int longest_match(IPos cur_match);

238

239

/* ===========================================================================

240

* Update a hash value with the given input byte

241

* IN assertion: all calls to to UPDATE_HASH are made with consecutive

242

* input characters, so that a running hash key can be computed from the

243

* previous key instead of complete recalculation each time.

244

*/

245

#define UPDATE_HASH(h,c) (h = (((h)<<H_SHIFT) ^ (c)) & HASH_MASK)

246

247

/* ===========================================================================

248

* Insert string s in the dictionary and set match_head to the previous head

249

* of the hash chain (the most recent string with same hash key). Return

250

* the previous length of the hash chain.

251

* IN assertion: all calls to to INSERT_STRING are made with consecutive

252

* input characters and the first MIN_MATCH bytes of s are valid

253

* (except for the last MIN_MATCH-1 bytes of the input file).

254

*/

255

#define INSERT_STRING(s, match_head) \

256

(UPDATE_HASH(ins_h, window[(s) + MIN_MATCH-1]), \

257

prev[(s) & WMASK] = match_head = head[ins_h], \

258

head[ins_h] = (s))

259

260

/* ===========================================================================

261

* Initialize the "longest match" routines for a new file

262

*/

263

void lm_init (int pack_level, /* 1: best speed, 9: best compression */

264

ush *flags) /* general purpose bit flag */

265

{

266

register unsigned j;

267

268

if (pack_level < 1 || pack_level > 9) gzip_error ("bad pack level");

269

270

/* Initialize the hash table. */

271

memzero((char*)head, HASH_SIZE*sizeof(*head));

272

/* prev will be initialized on the fly */

273

274

/* rsync params */

275

rsync_chunk_end = 0xFFFFFFFFUL;

276

rsync_sum = 0;

277

278

/* Set the default configuration parameters:

279

*/

280

max_lazy_match = configuration_table[pack_level].max_lazy;

281

good_match = configuration_table[pack_level].good_length;

282

#ifndef FULL_SEARCH

283

nice_match = configuration_table[pack_level].nice_length;

284

#endif

285

max_chain_length = configuration_table[pack_level].max_chain;

286

if (pack_level == 1) {

287

*flags |= FAST;

288

} else if (pack_level == 9) {

289

*flags |= SLOW;

290

}

291

/* ??? reduce max_chain_length for binary files */

292

293

strstart = 0;

294

block_start = 0L;

295

296

lookahead = read_buf((char*)window,

297

sizeof(int) <= 2 ? (unsigned)WSIZE : 2*WSIZE);

298

299

if (lookahead == 0 || lookahead == (unsigned)EOF) {

300

eofile = 1, lookahead = 0;

301

return;

302

}

303

eofile = 0;

304

/* Make sure that we always have enough lookahead. This is important

305

* if input comes from a device such as a tty.

306

*/

307

while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window();

308

309

ins_h = 0;

310

for (j=0; j<MIN_MATCH-1; j++) UPDATE_HASH(ins_h, window[j]);

311

/* If lookahead < MIN_MATCH, ins_h is garbage, but this is

312

* not important since only literal bytes will be emitted.

313

*/

314

}

315

316

/* ===========================================================================

317

* Set match_start to the longest match starting at the given string and

318

* return its length. Matches shorter or equal to prev_length are discarded,

319

* in which case the result is equal to prev_length and match_start is

320

* garbage.

321

* IN assertions: cur_match is the head of the hash chain for the current

322

* string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1

323

*/

324

int longest_match(IPos cur_match)

325

{

326

unsigned chain_length = max_chain_length; /* max hash chain length */

327

register uch *scan = window + strstart; /* current string */

328

register uch *match; /* matched string */

329

register int len; /* length of current match */

330

int best_len = prev_length; /* best match length so far */

331

IPos limit = strstart > (IPos)MAX_DIST ? strstart - (IPos)MAX_DIST : NIL;

332

/* Stop when cur_match becomes <= limit. To simplify the code,

333

* we prevent matches with the string of window index 0.

334

*/

335

336

/* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.

337

* It is easy to get rid of this optimization if necessary.

338

*/

339

#if HASH_BITS < 8 || MAX_MATCH != 258

340

error: Code too clever

341

#endif

342

343

#ifdef UNALIGNED_OK

344

/* Compare two bytes at a time. Note: this is not always beneficial.

345

* Try with and without -DUNALIGNED_OK to check.

346

*/

347

register uch *strend = window + strstart + MAX_MATCH - 1;

348

register ush scan_start = *(ush*)scan;

349

register ush scan_end = *(ush*)(scan+best_len-1);

350

#else

351

register uch *strend = window + strstart + MAX_MATCH;

352

register uch scan_end1 = scan[best_len-1];

353

register uch scan_end = scan[best_len];

354

#endif

355

356

/* Do not waste too much time if we already have a good match: */

357

if (prev_length >= good_match) {

358

chain_length >>= 2;

359

}

360

Assert(strstart <= window_size-MIN_LOOKAHEAD, "insufficient lookahead");

361

362

do {

363

Assert(cur_match < strstart, "no future");

364

match = window + cur_match;

365

366

/* Skip to next match if the match length cannot increase

367

* or if the match length is less than 2:

368

*/

369

#if (defined(UNALIGNED_OK) && MAX_MATCH == 258)

370

/* This code assumes sizeof(unsigned short) == 2. Do not use

371

* UNALIGNED_OK if your compiler uses a different size.

372

*/

373

if (*(ush*)(match+best_len-1) != scan_end ||

374

*(ush*)match != scan_start) continue;

375

376

/* It is not necessary to compare scan[2] and match[2] since they are

377

* always equal when the other bytes match, given that the hash keys

378

* are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at

379

* strstart+3, +5, ... up to strstart+257. We check for insufficient

380

* lookahead only every 4th comparison; the 128th check will be made

381

* at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is

382

* necessary to put more guard bytes at the end of the window, or

383

* to check more often for insufficient lookahead.

384

*/

385

scan++, match++;

386

do {

387

} while (*(ush*)(scan+=2) == *(ush*)(match+=2) &&

388

*(ush*)(scan+=2) == *(ush*)(match+=2) &&

389

*(ush*)(scan+=2) == *(ush*)(match+=2) &&

390

*(ush*)(scan+=2) == *(ush*)(match+=2) &&

391

scan < strend);

392

/* The funny "do {}" generates better code on most compilers */

393

394

/* Here, scan <= window+strstart+257 */

395

Assert(scan <= window+(unsigned)(window_size-1), "wild scan");

396

if (*scan == *match) scan++;

397

398

len = (MAX_MATCH - 1) - (int)(strend-scan);

399

scan = strend - (MAX_MATCH-1);

400

401

#else /* UNALIGNED_OK */

402

403

if (match[best_len] != scan_end ||

404

match[best_len-1] != scan_end1 ||

405

*match != *scan ||

406

*++match != scan[1]) continue;

407

408

/* The check at best_len-1 can be removed because it will be made

409

* again later. (This heuristic is not always a win.)

410

* It is not necessary to compare scan[2] and match[2] since they

411

* are always equal when the other bytes match, given that

412

* the hash keys are equal and that HASH_BITS >= 8.

413

*/

414

scan += 2, match++;

415

416

/* We check for insufficient lookahead only every 8th comparison;

417

* the 256th check will be made at strstart+258.

418

*/

419

do {

420

} while (*++scan == *++match && *++scan == *++match &&

421

*++scan == *++match && *++scan == *++match &&

422

*++scan == *++match && *++scan == *++match &&

423

*++scan == *++match && *++scan == *++match &&

424

scan < strend);

425

426

len = MAX_MATCH - (int)(strend - scan);

427

scan = strend - MAX_MATCH;

428

429

#endif /* UNALIGNED_OK */

430

431

if (len > best_len) {

432

match_start = cur_match;

433

best_len = len;

434

if (len >= nice_match) break;

435

#ifdef UNALIGNED_OK

436

scan_end = *(ush*)(scan+best_len-1);

437

#else

438

scan_end1 = scan[best_len-1];

439

scan_end = scan[best_len];

440

#endif

441

}

442

} while ((cur_match = prev[cur_match & WMASK]) > limit

443

&& --chain_length != 0);

444

445

return best_len;

446

}

447

448

/* ===========================================================================

449

* Fill the window when the lookahead becomes insufficient.

450

* Updates strstart and lookahead, and sets eofile if end of input file.

451

* IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0

452

* OUT assertions: at least one byte has been read, or eofile is set;

453

* file reads are performed for at least two bytes (required for the

454

* translate_eol option).

455

*/

456

static void fill_window(void)

457

{

458

register unsigned n, m;

459

unsigned more = (unsigned)(window_size - (ulg)lookahead - (ulg)strstart);

460

/* Amount of free space at the end of the window. */

461

462

/* If the window is almost full and there is insufficient lookahead,

463

* move the upper half to the lower one to make room in the upper half.

464

*/

465

if (more == (unsigned)EOF) {

466

/* Very unlikely, but possible on 16 bit machine if strstart == 0

467

* and lookahead == 1 (input done one byte at time)

468

*/

469

more--;

470

} else if (strstart >= WSIZE+MAX_DIST) {

471

/* By the IN assertion, the window is not empty so we can't confuse

472

* more == 0 with more == 64K on a 16 bit machine.

473

*/

474

Assert(window_size == (ulg)2*WSIZE, "no sliding with BIG_MEM");

475

476

memcpy((char*)window, (char*)window+WSIZE, (unsigned)WSIZE);

477

match_start -= WSIZE;

478

strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */

479

if (rsync_chunk_end != 0xFFFFFFFFUL)

480

rsync_chunk_end -= WSIZE;

481

482

block_start -= (long) WSIZE;

483

484

for (n = 0; n < HASH_SIZE; n++) {

485

m = head[n];

486

head[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL);

487

}

488

for (n = 0; n < WSIZE; n++) {

489

m = prev[n];

490

prev[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL);

491

/* If n is not on any hash chain, prev[n] is garbage but

492

* its value will never be used.

493

*/

494

}

495

more += WSIZE;

496

}

497

/* At this point, more >= 2 */

498

if (!eofile) {

499

n = read_buf((char*)window+strstart+lookahead, more);

500

if (n == 0 || n == (unsigned)EOF) {

501

eofile = 1;

502

} else {

503

lookahead += n;

504

}

505

}

506

}

507

508

static void rsync_roll(unsigned start, unsigned num)

509

{

510

unsigned i;

511

512

if (start < RSYNC_WIN) {

513

/* before window fills. */

514

for (i = start; i < RSYNC_WIN; i++) {

515

if (i == start + num) return;

516

rsync_sum += (ulg)window[i];

517

}

518

num -= (RSYNC_WIN - start);

519

start = RSYNC_WIN;

520

}

521

522

/* buffer after window full */

523

for (i = start; i < start+num; i++) {

524

/* New character in */

525

rsync_sum += (ulg)window[i];

526

/* Old character out */

527

rsync_sum -= (ulg)window[i - RSYNC_WIN];

528

if (rsync_chunk_end == 0xFFFFFFFFUL && RSYNC_SUM_MATCH(rsync_sum))

529

rsync_chunk_end = i;

530

}

531

}

532

533

/* ===========================================================================

534

* Set rsync_chunk_end if window sum matches magic value.

535

*/

536

#define RSYNC_ROLL(s, n) \

537

do { if (rsync) rsync_roll((s), (n)); } while(0)

538

539

/* ===========================================================================

540

* Flush the current block, with given end-of-file flag.

541

* IN assertion: strstart is set to the end of the current match.

542

*/

543

#define FLUSH_BLOCK(eof) \

544

flush_block(block_start >= 0L ? (char*)&window[(unsigned)block_start] : \

545

(char*)NULL, (long)strstart - block_start, flush-1, (eof))

546

547

/* ===========================================================================

548

* Processes a new input file and return its compressed length. This

549

* function does not perform lazy evaluationof matches and inserts

550

* new strings in the dictionary only for unmatched strings or for short

551

* matches. It is used only for the fast compression options.

552

*/

553

static void deflate_fast(int pack_level, int rsync)

554

{

555

IPos hash_head; /* head of the hash chain */

556

int flush; /* set if current block must be flushed, 2=>and padded */

557

unsigned match_length = 0; /* length of best match */

558

559

prev_length = MIN_MATCH-1;

560

while (lookahead != 0) {

561

/* Insert the string window[strstart .. strstart+2] in the

562

* dictionary, and set hash_head to the head of the hash chain:

563

*/

564

INSERT_STRING(strstart, hash_head);

565

566

/* Find the longest match, discarding those <= prev_length.

567

* At this point we have always match_length < MIN_MATCH

568

*/

569

if (hash_head != NIL && strstart - hash_head <= MAX_DIST

570

&& strstart <= window_size - MIN_LOOKAHEAD) {

571

/* To simplify the code, we prevent matches with the string

572

* of window index 0 (in particular we have to avoid a match

573

* of the string with itself at the start of the input file).

574

*/

575

match_length = longest_match (hash_head);

576

/* longest_match() sets match_start */

577

if (match_length > lookahead) match_length = lookahead;

578

}

579

if (match_length >= MIN_MATCH) {

580

flush = ct_tally(pack_level, strstart-match_start, match_length - MIN_MATCH);

581

582

lookahead -= match_length;

583

584

RSYNC_ROLL(strstart, match_length);

585

/* Insert new strings in the hash table only if the match length

586

* is not too large. This saves time but degrades compression.

587

*/

588

if (match_length <= max_insert_length) {

589

match_length--; /* string at strstart already in hash table */

590

do {

591

strstart++;

592

INSERT_STRING(strstart, hash_head);

593

/* strstart never exceeds WSIZE-MAX_MATCH, so there are

594

* always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH

595

* these bytes are garbage, but it does not matter since

596

* the next lookahead bytes will be emitted as literals.

597

*/

598

} while (--match_length != 0);

599

strstart++;

600

} else {

601

strstart += match_length;

602

match_length = 0;

603

ins_h = window[strstart];

604

UPDATE_HASH(ins_h, window[strstart+1]);

605

#if MIN_MATCH != 3

606

Call UPDATE_HASH() MIN_MATCH-3 more times

607

#endif

608

}

609

} else {

610

/* No match, output a literal byte */

611

Tracevv((stderr,"%c",window[strstart]));

612

flush = ct_tally (pack_level, 0, window[strstart]);

613

RSYNC_ROLL(strstart, 1);

614

lookahead--;

615

strstart++;

616

}

617

if (rsync && strstart > rsync_chunk_end) {

618

rsync_chunk_end = 0xFFFFFFFFUL;

619

flush = 2;

620

}

621

if (flush) FLUSH_BLOCK(0), block_start = strstart;

622

623

/* Make sure that we always have enough lookahead, except

624

* at the end of the input file. We need MAX_MATCH bytes

625

* for the next match, plus MIN_MATCH bytes to insert the

626

* string following the next match.

627

*/

628

while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window();

629

630

}

631

FLUSH_BLOCK(1); /* eof */

632

}

633

634

/* ===========================================================================

635

* Same as above, but achieves better compression. We use a lazy

636

* evaluation for matches: a match is finally adopted only if there is

637

* no better match at the next window position.

638

*/

639

void gnu_deflate(int pack_level, int rsync)

640

{

641

IPos hash_head; /* head of hash chain */

642

IPos prev_match; /* previous match */

643

int flush = 0; /* set if current block must be flushed */

644

int match_available = 0; /* set if previous match exists */

645

register unsigned match_length = MIN_MATCH-1; /* length of best match */

646

647

if (pack_level <= 3) {

648

deflate_fast(pack_level, rsync); /* optimized for speed */

649

return;

650

}

651

652

/* Process the input block. */

653

while (lookahead != 0) {

654

/* Insert the string window[strstart .. strstart+2] in the

655

* dictionary, and set hash_head to the head of the hash chain:

656

*/

657

INSERT_STRING(strstart, hash_head);

658

659

/* Find the longest match, discarding those <= prev_length.

660

*/

661

prev_length = match_length, prev_match = match_start;

662

match_length = MIN_MATCH-1;

663

664

if (hash_head != NIL && prev_length < max_lazy_match &&

665

strstart - hash_head <= MAX_DIST &&

666

strstart <= window_size - MIN_LOOKAHEAD) {

667

/* To simplify the code, we prevent matches with the string

668

* of window index 0 (in particular we have to avoid a match

669

* of the string with itself at the start of the input file).

670

*/

671

match_length = longest_match (hash_head);

672

/* longest_match() sets match_start */

673

if (match_length > lookahead) match_length = lookahead;

674

675

/* Ignore a length 3 match if it is too distant: */

676

if (match_length == MIN_MATCH && strstart-match_start > TOO_FAR){

677

/* If prev_match is also MIN_MATCH, match_start is garbage

678

* but we will ignore the current match anyway.

679

*/

680

match_length--;

681

}

682

}

683

/* If there was a match at the previous step and the current

684

* match is not better, output the previous match:

685

*/

686

if (prev_length >= MIN_MATCH && match_length <= prev_length) {

687

flush = ct_tally(pack_level, strstart-1-prev_match, prev_length - MIN_MATCH);

688

689

/* Insert in hash table all strings up to the end of the match.

690

* strstart-1 and strstart are already inserted.

691

*/

692

lookahead -= prev_length-1;

693

prev_length -= 2;

694

RSYNC_ROLL(strstart, prev_length+1);

695

do {

696

strstart++;

697

INSERT_STRING(strstart, hash_head);

698

/* strstart never exceeds WSIZE-MAX_MATCH, so there are

699

* always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH

700

* these bytes are garbage, but it does not matter since the

701

* next lookahead bytes will always be emitted as literals.

702

*/

703

} while (--prev_length != 0);

704

match_available = 0;

705

match_length = MIN_MATCH-1;

706

strstart++;

707

708

if (rsync && strstart > rsync_chunk_end) {

709

rsync_chunk_end = 0xFFFFFFFFUL;

710

flush = 2;

711

}

712

if (flush) FLUSH_BLOCK(0), block_start = strstart;

713

} else if (match_available) {

714

/* If there was no match at the previous position, output a

715

* single literal. If there was a match but the current match

716

* is longer, truncate the previous match to a single literal.

717

*/

718

Tracevv((stderr,"%c",window[strstart-1]));

719

flush = ct_tally (pack_level, 0, window[strstart-1]);

720

if (rsync && strstart > rsync_chunk_end) {

721

rsync_chunk_end = 0xFFFFFFFFUL;

722

flush = 2;

723

}

724

if (flush) FLUSH_BLOCK(0), block_start = strstart;

725

RSYNC_ROLL(strstart, 1);

726

strstart++;

727

lookahead--;

728

} else {

729

/* There is no previous match to compare with, wait for

730

* the next step to decide.

731

*/

732

if (rsync && strstart > rsync_chunk_end) {

733

/* Reset huffman tree */

734

rsync_chunk_end = 0xFFFFFFFFUL;

735

flush = 2;

736

FLUSH_BLOCK(0), block_start = strstart;

737

}

738

match_available = 1;

739

RSYNC_ROLL(strstart, 1);

740

strstart++;

741

lookahead--;

742

}

743

/* Assert (strstart <= bytes_in && lookahead <= bytes_in, "a bit too far"); */

744

745

/* Make sure that we always have enough lookahead, except

746

* at the end of the input file. We need MAX_MATCH bytes

747

* for the next match, plus MIN_MATCH bytes to insert the

748

* string following the next match.

749

*/

750

while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window();

751

}

752

if (match_available) ct_tally (pack_level, 0, window[strstart-1]);

753

754

FLUSH_BLOCK(1); /* eof */

755

}