2
; match.asm -- Pentium-Pro optimized version of longest_match()
4
; Updated for zlib 1.1.3 and converted to MASM 6.1x
5
; Copyright (C) 2000 Dan Higdon <hdan@kinesoft.com>
6
; and Chuck Walbourn <chuckw@kinesoft.com>
7
; Corrections by Cosmin Truta <cosmint@cs.ubbcluj.ro>
9
; This is free software; you can redistribute it and/or modify it
10
; under the terms of the GNU General Public License.
13
; Written for zlib 1.1.2
14
; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
16
; Modified by Gilles Vollant (2005) for add gzhead and gzindex
21
;===========================================================================
23
;===========================================================================
27
MIN_LOOKAHEAD EQU (MAX_MATCH + MIN_MATCH + 1)
28
MAX_MATCH_8 EQU ((MAX_MATCH + 7) AND (NOT 7))
30
;===========================================================================
32
;===========================================================================
34
; This STRUCT assumes a 4-byte alignment
40
ds_pending_buf_size dd ?
44
; gzhead and gzindex are added in zlib 1.2.2.2 (see deflate.h)
65
ds_match_length dd ? ; used
66
ds_prev_match dd ? ; used
67
ds_match_available dd ?
68
ds_strstart dd ? ; used
69
ds_match_start dd ? ; used
70
ds_lookahead dd ? ; used
71
ds_prev_length dd ? ; used
72
ds_max_chain_length dd ? ; used
73
ds_max_laxy_match dd ?
76
ds_good_match dd ? ; used
77
ds_nice_match dd ? ; used
79
; Don't need anymore of the struct for match
82
;===========================================================================
84
;===========================================================================
87
;---------------------------------------------------------------------------
89
;---------------------------------------------------------------------------
93
; no initialization needed
97
;---------------------------------------------------------------------------
98
; uInt longest_match(deflate_state *deflatestate, IPos curmatch)
99
;---------------------------------------------------------------------------
102
PUBLIC _longest_match
105
; Since this code uses EBP for a scratch register, the stack frame must
106
; be manually constructed and referenced relative to the ESP register.
110
chainlenwmask = 0 ; high word: current chain len
112
window = 4 ; local copy of s->window
113
windowbestlen = 8 ; s->window + bestlen
114
scanend = 12 ; last two bytes of string
115
scanstart = 16 ; first two bytes of string
116
scanalign = 20 ; dword-misalignment of string
117
nicematch = 24 ; a good enough match size
118
bestlen = 28 ; size of best match so far
119
scan = 32 ; ptr to string wanting match
120
varsize = 36 ; number of bytes (also offset to last saved register)
122
; Saved Registers (actually pushed into place)
133
; Save registers that the compiler may be using
139
; Allocate local variable space
142
; Retrieve the function arguments. ecx will hold cur_match
143
; throughout the entire function. edx will hold the pointer to the
144
; deflate_state structure during the function's setup (before
145
; entering the main loop).
147
mov edx, [esp+deflatestate]
148
ASSUME edx:PTR DEFLATE_STATE
150
mov ecx, [esp+curmatch]
152
; uInt wmask = s->w_mask;
153
; unsigned chain_length = s->max_chain_length;
154
; if (s->prev_length >= s->good_match) {
155
; chain_length >>= 2;
158
mov eax, [edx].ds_prev_length
159
mov ebx, [edx].ds_good_match
161
mov eax, [edx].ds_w_mask
162
mov ebx, [edx].ds_max_chain_length
163
jl SHORT LastMatchGood
167
; chainlen is decremented once beforehand so that the function can
168
; use the sign flag instead of the zero flag for the exit test.
169
; It is then shifted into the high word, to make room for the wmask
170
; value, which it will always accompany.
175
mov [esp+chainlenwmask], ebx
177
; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
179
mov eax, [edx].ds_nice_match
180
mov ebx, [edx].ds_lookahead
182
jl SHORT LookaheadLess
185
mov [esp+nicematch], ebx
187
;/* register Bytef *scan = s->window + s->strstart; */
189
mov esi, [edx].ds_window
190
mov [esp+window], esi
191
mov ebp, [edx].ds_strstart
195
;/* Determine how many bytes the scan ptr is off from being */
196
;/* dword-aligned. */
201
mov [esp+scanalign], eax
203
;/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */
204
;/* s->strstart - (IPos)MAX_DIST(s) : NIL; */
206
mov eax, [edx].ds_w_size
207
sub eax, MIN_LOOKAHEAD
209
jg SHORT LimitPositive
213
;/* int best_len = s->prev_length; */
215
mov eax, [edx].ds_prev_length
216
mov [esp+bestlen], eax
218
;/* Store the sum of s->window + best_len in %esi locally, and in %esi. */
221
mov [esp+windowbestlen], esi
223
;/* register ush scan_start = *(ushf*)scan; */
224
;/* register ush scan_end = *(ushf*)(scan+best_len-1); */
225
;/* Posf *prev = s->prev; */
227
movzx ebx, WORD PTR[edi]
228
mov [esp+scanstart], ebx
229
movzx ebx, WORD PTR[eax+edi-1]
230
mov [esp+scanend], ebx
231
mov edi, [edx].ds_prev
233
;/* Jump into the main loop. */
235
mov edx, [esp+chainlenwmask]
239
; * match = s->window + cur_match;
240
; * if (*(ushf*)(match+best_len-1) != scan_end ||
241
; * *(ushf*)match != scan_start) continue;
243
; * } while ((cur_match = prev[cur_match & wmask]) > limit
244
; * && --chain_length != 0);
246
; * Here is the inner loop of the function. The function will spend the
247
; * majority of its time in this loop, and majority of that time will
248
; * be spent in the first ten instructions.
250
; * Within this loop:
253
; * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
254
; * %esi = windowbestlen - i.e., (window + bestlen)
262
movzx ecx, WORD PTR[edi+ecx*2]
269
movzx eax, WORD PTR[esi+ecx-1]
273
mov eax, [esp+window]
274
movzx eax, WORD PTR[eax+ecx]
275
cmp eax, [esp+scanstart]
278
;/* Store the current value of chainlen. */
280
mov [esp+chainlenwmask], edx
282
;/* Point %edi to the string under scrutiny, and %esi to the string we */
283
;/* are hoping to match it up with. In actuality, %esi and %edi are */
284
;/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */
285
;/* initialized to -(MAX_MATCH_8 - scanalign). */
287
mov esi, [esp+window]
290
mov eax, [esp+scanalign]
291
mov edx, -MAX_MATCH_8
292
lea edi, [edi+eax+MAX_MATCH_8]
293
lea esi, [esi+eax+MAX_MATCH_8]
295
;/* Test the strings for equality, 8 bytes at a time. At the end,
296
; * adjust %edx so that it is offset to the exact byte that mismatched.
298
; * We already know at this point that the first three bytes of the
299
; * strings match each other, and they can be safely passed over before
300
; * starting the compare loop. So what this code does is skip over 0-3
301
; * bytes, as much as necessary in order to dword-align the %edi
302
; * pointer. (%esi will still be misaligned three times out of four.)
304
; * It should be confessed that this loop usually does not represent
305
; * much of the total running time. Replacing it with a more
306
; * straightforward "rep cmpsb" would not drastically degrade
311
mov eax, DWORD PTR[esi+edx]
312
xor eax, DWORD PTR[edi+edx]
313
jnz SHORT LeaveLoopCmps
315
mov eax, DWORD PTR[esi+edx+4]
316
xor eax, DWORD PTR[edi+edx+4]
317
jnz SHORT LeaveLoopCmps4
338
;/* Calculate the length of the match. If it is longer than MAX_MATCH, */
339
;/* then automatically accept it as the best possible match and leave. */
347
;/* If the length of the match is not longer than the best match we */
348
;/* have so far, then forget it and return to the lookup loop. */
350
mov edx, [esp+deflatestate]
351
mov ebx, [esp+bestlen]
354
mov esi, [esp+windowbestlen]
355
mov edi, [edx].ds_prev
356
mov ebx, [esp+scanend]
357
mov edx, [esp+chainlenwmask]
361
;/* s->match_start = cur_match; */
362
;/* best_len = len; */
363
;/* if (len >= nice_match) break; */
364
;/* scan_end = *(ushf*)(scan+best_len-1); */
367
mov ebx, [esp+nicematch]
368
mov [esp+bestlen], eax
369
mov [edx].ds_match_start, ecx
372
mov esi, [esp+window]
374
mov [esp+windowbestlen], esi
375
movzx ebx, WORD PTR[edi+eax-1]
376
mov edi, [edx].ds_prev
377
mov [esp+scanend], ebx
378
mov edx, [esp+chainlenwmask]
382
;/* Accept the current string, with the maximum possible length. */
385
mov edx, [esp+deflatestate]
386
mov DWORD PTR[esp+bestlen], MAX_MATCH
387
mov [edx].ds_match_start, ecx
389
;/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */
390
;/* return s->lookahead; */
393
mov edx, [esp+deflatestate]
394
mov ebx, [esp+bestlen]
395
mov eax, [edx].ds_lookahead
397
jg SHORT LookaheadRet
401
; Restore the stack and return from whence we came.