~ubuntu-branches/ubuntu/edgy/agrep/edgy

« back to all changes in this revision

Viewing changes to newmgrep.c

  • Committer: Bazaar Package Importer
  • Author(s): Daniel Baumann
  • Date: 2005-12-27 17:01:00 UTC
  • mfrom: (1.1.1 upstream)
  • Revision ID: james.westby@ubuntu.com-20051227170100-nk2hnq0bnlkbk3q3
Tags: 4.17-2
Added patch to fix FTBS on amd64 (Closes: #344909).

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal.  All Rights Reserved. */
 
2
 
 
3
/* multipattern matcher */
 
4
 
 
5
#include <stdio.h>
 
6
#include <ctype.h>
 
7
#include <errno.h>
 
8
#ifdef ultrix
 
9
#include <sys/types.h>
 
10
#endif
 
11
#include <sys/stat.h>
 
12
#include "agrep.h"
 
13
#include <sys/time.h>
 
14
 
 
15
 
 
16
#define ddebug
 
17
#define uchar unsigned char
 
18
#undef  MAXPAT
 
19
#define MAXPAT  256
 
20
#undef  MAXLINE
 
21
#define MAXLINE 1024
 
22
#undef  MAXSYM
 
23
#define MAXSYM  256
 
24
#define MAXMEMBER1 32768
 
25
/* #define MAXMEMBER1 262144 */ /*2^18 */ 
 
26
#define MAXPATFILE 600000
 
27
#define BLOCKSIZE  16384
 
28
#define MAXHASH    32768 
 
29
/* #define MAXHASH    262144 */
 
30
#define mask5      32767
 
31
#define max_num    MAX_DASHF_FILES
 
32
#if     ISO_CHAR_SET
 
33
#define W_DELIM    256
 
34
#else
 
35
#define W_DELIM    128
 
36
#endif
 
37
#define L_DELIM    10 
 
38
#define Hbits 5 /* how much to shift to perform the hash */
 
39
 
 
40
extern char aduplicates[MAXNUM_PAT][MAXNUM_PAT];        /* tells what other patterns are exactly equal to the i-th one */
 
41
extern char tc_aduplicates[MAXNUM_PAT][MAXNUM_PAT];     /* tells what other patterns are exactly equal to the i-th one */
 
42
extern  ParseTree aterminals[MAXNUM_PAT];
 
43
extern int      AComplexBoolean;
 
44
extern int LIMITOUTPUT, LIMITPERFILE;
 
45
extern int BYTECOUNT, PRINTOFFSET, PRINTRECORD, CurrentByteOffset;
 
46
extern int MULTI_OUTPUT;        /* used by glimpse only if OR, never for AND */
 
47
extern int DELIMITER;
 
48
extern CHAR D_pattern[MaxDelimit*2];
 
49
extern int D_length;
 
50
extern CHAR tc_D_pattern[MaxDelimit*2];
 
51
extern int tc_D_length;
 
52
extern COUNT, FNAME, SILENT, FILENAMEONLY, prev_num_of_matched, num_of_matched, PRINTFILETIME;
 
53
extern INVERSE, OUTTAIL;
 
54
extern WORDBOUND, WHOLELINE, NOUPPER;
 
55
extern ParseTree *AParse;
 
56
extern int AComplexPattern;
 
57
extern unsigned char  CurrentFileName[], Progname[]; 
 
58
extern long CurrentFileTime;
 
59
extern total_line;
 
60
extern agrep_initialfd;
 
61
extern int EXITONERROR;
 
62
extern int PRINTPATTERN;
 
63
extern int agrep_inlen;
 
64
extern CHAR *agrep_inbuffer;
 
65
extern FILE *agrep_finalfp;
 
66
extern int agrep_outpointer;
 
67
extern int agrep_outlen;
 
68
extern CHAR * agrep_outbuffer;
 
69
extern int errno;
 
70
extern int NEW_FILE, POST_FILTER;
 
71
 
 
72
extern int tuncompressible();
 
73
extern int quick_tcompress();
 
74
extern int quick_tuncompress();
 
75
extern int TCOMPRESSED;
 
76
extern int EASYSEARCH;
 
77
extern char FREQ_FILE[MAX_LINE_LEN], HASH_FILE[MAX_LINE_LEN], STRING_FILE[MAX_LINE_LEN];
 
78
extern char PAT_FILE_NAME[MAX_LINE_LEN];
 
79
 
 
80
uchar SHIFT1[MAXMEMBER1];
 
81
 
 
82
int   LONG  = 0;
 
83
int   SHORT = 0;
 
84
int   p_size= 0;
 
85
 
 
86
uchar tr[MAXSYM];
 
87
uchar tr1[MAXSYM];
 
88
int   HASH[MAXHASH];
 
89
int   Hash2[max_num];
 
90
uchar *PatPtr[max_num];
 
91
uchar *pat_spool = NULL; /* [MAXPATFILE+2*max_num+MAXPAT]; */
 
92
uchar *patt[max_num];
 
93
int   pat_len[max_num];
 
94
int   pat_indices[max_num]; /* pat_indices[p] gives the actual index in matched_teriminals: used only with AParse != 0 */
 
95
int num_pat;
 
96
 
 
97
extern char  amatched_terminals[MAXNUM_PAT]; /* which patterns have been matched in the current line? Used only with AParse != 0, so max_num is not needed */
 
98
extern int anum_terminals;
 
99
extern int AComplexBoolean;
 
100
static void countline();
 
101
void acompute_duplicates();
 
102
#if     DOTCOMPRESSED
 
103
/* Equivalent variables for compression search */
 
104
uchar tc_SHIFT1[MAXMEMBER1];
 
105
 
 
106
int   tc_LONG  = 0;
 
107
int   tc_SHORT = 0;
 
108
int   tc_p_size= 0;
 
109
 
 
110
uchar tc_tr[MAXSYM];
 
111
uchar tc_tr1[MAXSYM];
 
112
int   tc_HASH[MAXHASH];
 
113
int   tc_Hash2[max_num];
 
114
uchar *tc_PatPtr[max_num];
 
115
uchar *tc_pat_spool = NULL; /* [MAXPATFILE+2*max_num+MAXPAT]; */
 
116
uchar *tc_patt[max_num];
 
117
int   tc_pat_len[max_num];
 
118
int   tc_pat_indices[max_num]; /* pat_indices[p] gives the actual index in matched_teriminals: used only with AParse != 0 */
 
119
int tc_num_pat; /* must be the same as num_pat */
 
120
#endif  /*DOTCOMPRESSED*/
 
121
 
 
122
static void f_prep();
 
123
static void f_prep1();
 
124
static void accumulate();
 
125
#if     DOTCOMPRESSED
 
126
static void tc_f_prep();
 
127
static void tc_f_prep1();
 
128
static void tc_accumulate();
 
129
#endif
 
130
 
 
131
#ifdef perf_check
 
132
        int cshift=0, cshift0=0, chash=0;
 
133
#endif
 
134
 
 
135
/*
 
136
 * General idea behind output processing with delimiters, inverse, compression, etc.
 
137
 * CAUTION: In compressed files, we can search ONLY for simple patterns or their ;,.
 
138
 * Attempts to search for complex patterns / with errors might lead to spurious matches.
 
139
 * 1. Once we find the match, go back and forward to get the delimiters that surround
 
140
 *    the matched region.
 
141
 * 2. If it is a compressed file, verify that the match is "real" (compressed files
 
142
 *    can have pseudo matches hence this filtering step is required).
 
143
 * 3. Increment num_of_matched.
 
144
 * 4. Process some output options which print stuff before the matched region is
 
145
 *    printed.
 
146
 * 5. If there is compression, decomress and output the matched region. Otherwise
 
147
 *    just output it as is. Remember, from step (1) we know the matched region.
 
148
 * 6. If inverse is set, then we must keep track of the end of the last matched region
 
149
 *    in the variable lastout. When there is a match, we must print everything from
 
150
 *    lastout to the beginning of the current matched region (curtextbegin) and then
 
151
 *    update lastout to point to the end of the current matched region (curtextend).
 
152
 *    ALSO: if we exit from the main loops, we must output everything from the end
 
153
 *    of the last matched region to the end of the input buffer.
 
154
 * 7. Delimiter handling in complex patterns is different: there the search is done
 
155
 *    for a boolean and of the delimiter pattern and the actual pattern.
 
156
 * 8. For convenience and speed, the multipattern matching routines to handle
 
157
 *    compressed files have been separated from their (normal) counterparts.
 
158
 * 9. One special note on handling complicated boolean patterns: the parse
 
159
 *    tree will be the same for both compressed and uncomrpessed patterns and the
 
160
 *    same amatched_terminals array will be used in both. BUT, the pat_spool and
 
161
 *    pat_index, etc., will be different as they refer to the individual terminals.
 
162
 */
 
163
 
 
164
int
 
165
prepf(mfp, mbuf, mlen)
 
166
int mfp, mlen;
 
167
unsigned char *mbuf;
 
168
{
 
169
        int length=0, i, p=1;
 
170
        uchar *pat_ptr;
 
171
        unsigned Mask = 31;
 
172
        int num_read;
 
173
        unsigned char *buf;
 
174
        struct stat stbuf;
 
175
        int j, k;       /* to implement \\ */
 
176
 
 
177
        if ((mfp == -1) && ((mbuf == NULL) || (mlen <= 0))) return -1;
 
178
 
 
179
        if (mfp != -1) {
 
180
                if (fstat(mfp, &stbuf) == -1) {
 
181
                        fprintf(stderr, "%s: cannot stat file: %s\n", Progname, PAT_FILE_NAME);
 
182
                        return -1;
 
183
                }
 
184
                if (!S_ISREG(stbuf.st_mode)) {
 
185
                        fprintf(stderr, "%s: pattern file not regular file: %s\n", Progname, PAT_FILE_NAME);
 
186
                        return -1;
 
187
                }
 
188
                if (stbuf.st_size*2 > MAXPATFILE + 2*max_num) {
 
189
                        fprintf(stderr, "%s: pattern file too large (> %d B): %s\n", Progname, (MAXPATFILE+2*max_num)/2, PAT_FILE_NAME);
 
190
                        return -1;
 
191
                }
 
192
                if (pat_spool != NULL) free(pat_spool);
 
193
                pat_ptr = pat_spool = (unsigned char *)malloc(stbuf.st_size*2 + MAXPAT);
 
194
                alloc_buf(mfp, &buf, MAXPATFILE+2*BlockSize);
 
195
                while((num_read = fill_buf(mfp, buf+length, 2*BlockSize)) > 0) {
 
196
                        length = length + num_read;
 
197
                        if(length > MAXPATFILE) {
 
198
                                fprintf(stderr, "%s: maximum pattern file size is %d\n", Progname, MAXPATFILE);
 
199
                                if (!EXITONERROR) {
 
200
                                        errno = AGREP_ERROR;
 
201
                                        free_buf(mfp, buf);
 
202
                                        return -1;
 
203
                                }
 
204
                                else exit(2);
 
205
                        }
 
206
                }
 
207
        }
 
208
        else {
 
209
                buf = mbuf;
 
210
                length = mlen;
 
211
                if (mlen*2 > MAXPATFILE + 2*max_num) {
 
212
                        fprintf(stderr, "%s: pattern buffer too large (> %d B)\n", Progname, (MAXPATFILE+2*max_num)/2);
 
213
                        return -1;
 
214
                }
 
215
                if (pat_spool != NULL) free(pat_spool);
 
216
                pat_ptr = pat_spool = (unsigned char *)malloc(mlen*2 + MAXPAT);
 
217
        }
 
218
 
 
219
        /* Now all the patterns are in buf */
 
220
        buf[length] = '\n';
 
221
        i=0; p=1;
 
222
/* removed by Udi 11/8/94 - we now do WORDBOUND "by hand" 
 
223
        if(WORDBOUND) {
 
224
                while(i<length) {
 
225
                        patt[p] = pat_ptr;
 
226
                        *pat_ptr++ = W_DELIM;
 
227
                        while((i<length) && ((*pat_ptr = buf[i++]) != '\n')) pat_ptr++;
 
228
                        *pat_ptr++ = W_DELIM;
 
229
                        *pat_ptr++ = 0;
 
230
                        p++;
 
231
                }
 
232
        }
 
233
        else
 
234
*/
 
235
        if(WHOLELINE) {
 
236
                while(i<length) {
 
237
                        patt[p] = pat_ptr;
 
238
                        *pat_ptr++ = L_DELIM;
 
239
                        while((i<length) && ((*pat_ptr = buf[i++]) != '\n')) pat_ptr++;
 
240
                        *pat_ptr++ = L_DELIM;
 
241
                        *pat_ptr++ = 0;
 
242
                        p++;
 
243
                }
 
244
        }
 
245
        else {
 
246
                while(i < length) {
 
247
                        patt[p] = pat_ptr;
 
248
                        while((i<length) && ((*pat_ptr = buf[i++]) != '\n')) pat_ptr++;
 
249
                        *pat_ptr++ = 0;
 
250
                        p++;  
 
251
                }
 
252
        }
 
253
 
 
254
        /* Now, the patterns have been copied into patt[] */
 
255
        if(p>max_num) {
 
256
                fprintf(stderr, "%s: maximum number of patterns is %d\n", Progname, max_num); 
 
257
                if (!EXITONERROR) {
 
258
                        errno = AGREP_ERROR;
 
259
                        free_buf(mfp, buf);
 
260
                        return -1;
 
261
                }
 
262
                else exit(2);
 
263
 
 
264
        }
 
265
 
 
266
        for(i=1; i<20; i++) *pat_ptr = i;  /* boundary safety zone */
 
267
 
 
268
        /* I might have to keep changing tr s.t. mgrep won't get confused with W_DELIM */
 
269
        for(i=0; i< MAXSYM; i++) tr[i] = i;
 
270
        if(NOUPPER) {
 
271
                for (i=0; i<MAXSYM; i++)
 
272
                        if (isupper(i)) tr[i] = tr[tolower(i)];
 
273
                /* for(i='A'; i<= 'Z'; i++) tr[i] = i + 'a' - 'A'; */
 
274
        }
 
275
/*
 
276
        if(WORDBOUND) {
 
277
                for(i=1; i<MAXSYM; i++) if(!isalnum(i)) tr[i] = W_DELIM;
 
278
        }
 
279
removed by Udi 11/8/94 - the trick of using W-delim was too buggy.
 
280
we now do it "by hand" after we find a match 
 
281
*/
 
282
 
 
283
        for(i=0; i< MAXSYM; i++) tr1[i] = tr[i]&Mask;
 
284
        num_pat =  p-1;
 
285
        p_size  =  MAXPAT;
 
286
        for(i=1; i<=num_pat; i++) {
 
287
                p = strlen(patt[i]);
 
288
                if ((patt[i][0] == '^') || (patt[i][0] == '$')) patt[i][0] = '\n';
 
289
                if ((p > 1) && ((patt[i][p-1] == '^') || (patt[i][p-1] == '$')) && (patt[i][p-2] != '\\')) patt[i][p-1] = '\n';
 
290
 
 
291
                /* Added by bg, Dec 2nd 1994 */
 
292
                for (k=0; k<p; k++) {
 
293
                        if (patt[i][k] == '\\') {
 
294
                                for (j=k; j<p; j++)
 
295
                                        patt[i][j] = patt[i][j+1]; /* including '\0' */
 
296
                                p--;
 
297
                        }
 
298
                }
 
299
 
 
300
                pat_len[i] = p;
 
301
                /*
 
302
                pat_len[i] = (WORDBOUND?(p-2>0?p-2:1):p);  changed by Udi 11/8/94
 
303
                */
 
304
#ifdef  debug
 
305
                printf("prepf(): patt[%d]=%s, pat_len[%d]=%d\n", i, patt[i], i, pat_len[i]);
 
306
#endif
 
307
                if(p!=0 && p < p_size) p_size = p;      /* MIN */
 
308
        }
 
309
        if(p_size == 0) {
 
310
                fprintf(stderr, "%s: the pattern file is empty\n", Progname);
 
311
                if (!EXITONERROR) {
 
312
                        errno = AGREP_ERROR;
 
313
                        free_buf(mfp, buf);
 
314
                        return -1;
 
315
                }
 
316
                else exit(2);
 
317
        }
 
318
        if(length > 400 && p_size > 2) LONG = 1;
 
319
        if(p_size == 1) SHORT = 1;
 
320
        for(i=0; i<MAXMEMBER1; i++) SHIFT1[i] = p_size - 1 - LONG;
 
321
        for(i=0; i<MAXHASH; i++) {
 
322
                HASH[i] = 0;
 
323
        }
 
324
        for(i=1; i<=num_pat; i++) f_prep(i, patt[i]);
 
325
        accumulate();
 
326
        memset(pat_indices, '\0', sizeof(int) * (num_pat + 1));
 
327
        for(i=1; i<=num_pat; i++) f_prep1(i, patt[i]);
 
328
 
 
329
#if     DOTCOMPRESSED
 
330
        /* prepf for compression */
 
331
        if (-1 == tc_prepf(buf, length)) {
 
332
                free_buf(mfp, buf);
 
333
                return -1;
 
334
        }
 
335
#endif  /*DOTCOMPRESSED*/
 
336
        free_buf(mfp, buf);
 
337
        acompute_duplicates(aduplicates, aterminals, anum_terminals, tr);
 
338
        return 0;
 
339
}
 
340
 
 
341
#if     DOTCOMPRESSED
 
342
/*
 
343
 * Compression equivalent of prepf: called right after prepf.
 
344
 * 1. Read patt and SHIFT1
 
345
 * 2. Call tcompress on the patterns in patt and put in tc_patt.
 
346
 * 3. Use these patterns to compute tc_SHIFT (ignore WDELIM, LDELIM, case sensitivity, etc.)
 
347
 * 4. Process other variables/functions (pat_spool, tr, tr1, pat_len, accumulate, SHIFT1, f_prep, f_prep1, pat_indices) appropriately.
 
348
 */
 
349
int
 
350
tc_prepf(buf, length)
 
351
unsigned char *buf;
 
352
int     length;
 
353
{
 
354
        int i, p=1;
 
355
        uchar *pat_ptr;
 
356
        unsigned Mask = 31;
 
357
        int tc_length;
 
358
        unsigned char tc_buf[MAXPAT * 2];       /* maximum length of the compressed pattern */
 
359
        static struct timeval initt, finalt;
 
360
 
 
361
        if (length*2 > MAXPATFILE + 2*max_num) {
 
362
                fprintf(stderr, "%s: pattern buffer too large (> %d B)\n", Progname, (MAXPATFILE+2*max_num)/2);
 
363
                return -1;
 
364
        }
 
365
        if (tc_pat_spool != NULL) free(tc_pat_spool);
 
366
        pat_ptr = tc_pat_spool = (unsigned char *)malloc(length*2 + MAXPAT);
 
367
 
 
368
#if     MEASURE_TIMES
 
369
        gettimeofday(&initt, NULL);
 
370
#endif  /*MEASURE_TIMES*/
 
371
 
 
372
        i=0; p=1;
 
373
        while(i < length) {
 
374
                tc_patt[p] = pat_ptr;
 
375
                while((*pat_ptr = buf[i++]) != '\n') pat_ptr++;
 
376
                *pat_ptr++ = 0;
 
377
                if ((tc_length = quick_tcompress(FREQ_FILE, HASH_FILE, tc_patt[p], strlen(tc_patt[p]), tc_buf, MAXPAT * 2 - 8, TC_EASYSEARCH)) > 0) {
 
378
                        memcpy(tc_patt[p], tc_buf, tc_length);
 
379
                        tc_patt[p][tc_length] = '\0';
 
380
                        pat_ptr = tc_patt[p] + tc_length + 1;   /* character after '\0' */
 
381
                }
 
382
                p++;  
 
383
        }
 
384
 
 
385
        for(i=1; i<20; i++) *pat_ptr = i;  /* boundary safety zone */
 
386
 
 
387
        /* Ignore all other options: it is automatically W_DELIM */
 
388
        for(i=0; i< MAXSYM; i++) tc_tr[i] = i;
 
389
        for(i=0; i< MAXSYM; i++) tc_tr1[i] = tc_tr[i]&Mask;
 
390
        tc_num_pat =  p-1;
 
391
        tc_p_size  =  MAXPAT;
 
392
        for(i=1; i<=num_pat; i++) {
 
393
                p = strlen(tc_patt[i]);
 
394
                tc_pat_len[i] = p;
 
395
#ifdef  debug
 
396
                printf("prepf(): tc_patt[%d]=%s, tc_pat_len[%d]=%d\n", i, tc_patt[i], i, tc_pat_len[i]);
 
397
#endif
 
398
                if(p!=0 && p < tc_p_size) tc_p_size = p;        /* MIN */
 
399
        }
 
400
        if(tc_p_size == 0) {    /* cannot happen NOW */
 
401
                fprintf(stderr, "%s: the pattern file is empty\n", Progname);
 
402
                if (!EXITONERROR) {
 
403
                        errno = AGREP_ERROR;
 
404
                        return -1;
 
405
                }
 
406
                else exit(2);
 
407
        }
 
408
        if(length > 400 && tc_p_size > 2) tc_LONG = 1;
 
409
        if(tc_p_size == 1) tc_SHORT = 1;
 
410
        for(i=0; i<MAXMEMBER1; i++) tc_SHIFT1[i] = tc_p_size - 1 - LONG;
 
411
        for(i=0; i<MAXHASH; i++) {
 
412
                tc_HASH[i] = 0;
 
413
        }
 
414
        for(i=1; i<=tc_num_pat; i++) tc_f_prep(i, tc_patt[i]);
 
415
        tc_accumulate();
 
416
        memset(tc_pat_indices, '\0', sizeof(int) * (tc_num_pat + 1));
 
417
        for(i=1; i<=tc_num_pat; i++) tc_f_prep1(i, tc_patt[i]);
 
418
 
 
419
        acompute_duplicates(tc_aduplicates, aterminals, anum_terminals, tc_tr);
 
420
#if     MEASURE_TIMES
 
421
        gettimeofday(&finalt, NULL);
 
422
        INFILTER_ms +=  (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
 
423
#endif  /*MEASURE_TIMES*/
 
424
        return 0;
 
425
}
 
426
#endif  /*DOTCOMPRESSED*/
 
427
 
 
428
int
 
429
mgrep(fd)
 
430
int fd;
 
431
 
432
        register char r_newline = '\n';
 
433
        unsigned char *text;
 
434
        register int buf_end, num_read, start, end, residue = 0;
 
435
        int     oldCurrentByteOffset;
 
436
        int     first_time = 1;
 
437
 
 
438
#if     AGREP_POINTER
 
439
        if (fd != -1) {
 
440
#endif  /*AGREP_POINTER*/
 
441
                alloc_buf(fd, &text, 2*BlockSize+Max_record);
 
442
                text[Max_record-1] = '\n';  /* initial case */
 
443
                start = Max_record;
 
444
 
 
445
                while( (num_read = fill_buf(fd, text+Max_record, 2*BlockSize)) > 0) 
 
446
                {
 
447
                        buf_end = end = Max_record + num_read -1 ;
 
448
                        oldCurrentByteOffset = CurrentByteOffset;
 
449
 
 
450
                        if (first_time) {
 
451
                                if ((TCOMPRESSED == ON) && tuncompressible(text+Max_record, num_read)) {
 
452
                                        EASYSEARCH = text[Max_record+SIGNATURE_LEN-1];
 
453
                                        start += SIGNATURE_LEN;
 
454
                                        CurrentByteOffset += SIGNATURE_LEN;
 
455
                                        if (!EASYSEARCH) {
 
456
                                                fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName);
 
457
                                        }
 
458
                                }
 
459
                                else TCOMPRESSED = OFF;
 
460
                                first_time = 0;
 
461
                        }
 
462
 
 
463
                        if (!DELIMITER) {
 
464
                                while(text[end]  != r_newline && end > Max_record) end--;
 
465
                                text[start-1] = r_newline;
 
466
                        }
 
467
                        else {
 
468
                                unsigned char *newbuf = text + end + 1;
 
469
                                newbuf = backward_delimiter(newbuf, text+Max_record, D_pattern, D_length, OUTTAIL);     /* see agrep.c/'d' */
 
470
                                if (newbuf < text+Max_record+D_length) newbuf = text + end + 1;
 
471
                                end = newbuf - text - 1;
 
472
                                memcpy(text+start-D_length, D_pattern, D_length);
 
473
                        }
 
474
                        residue = buf_end - end  + 1 ;
 
475
                        if(INVERSE && COUNT) countline(text+Max_record, num_read);
 
476
 
 
477
                        /* MGREP_PROCESS */
 
478
                        if (TCOMPRESSED) {      /* separate functions since separate globals => too many if-statements within a single function makes it slow */
 
479
#if     DOTCOMPRESSED
 
480
                                if(tc_SHORT) { if (-1 == tc_m_short(text, start, end)) {free_buf(fd, text); return -1;}}
 
481
                                else      { if (-1 == tc_monkey1(text, start, end)) {free_buf(fd, text); return -1;}}
 
482
#endif  /*DOTCOMPRESSED*/
 
483
                        }
 
484
                        else {
 
485
                                if(SHORT) { if (-1 == m_short(text, start, end)) {free_buf(fd, text); return -1;}}
 
486
                                else      { if (-1 == monkey1(text, start, end)) {free_buf(fd, text); return -1;}}
 
487
                        }
 
488
                        if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {
 
489
                                if (agrep_finalfp != NULL)
 
490
                                        fprintf(agrep_finalfp, "%s", CurrentFileName);
 
491
                                else {
 
492
                                        int outindex;
 
493
                                        for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
 
494
                                                        (CurrentFileName[outindex] != '\0'); outindex++) {
 
495
                                                agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
 
496
                                        }
 
497
                                        if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
 
498
                                                OUTPUT_OVERFLOW;
 
499
                                                free_buf(fd, text);
 
500
                                                return -1;
 
501
                                        }
 
502
                                        agrep_outpointer += outindex;
 
503
                                }
 
504
                                if (PRINTFILETIME) {
 
505
                                        char *s = aprint_file_time(CurrentFileTime);
 
506
                                        if (agrep_finalfp != NULL)
 
507
                                                fprintf(agrep_finalfp, "%s", s);
 
508
                                        else {
 
509
                                                int outindex;
 
510
                                                for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
 
511
                                                                (s[outindex] != '\0'); outindex++) {
 
512
                                                        agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
 
513
                                                }
 
514
                                                if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
 
515
                                                        OUTPUT_OVERFLOW;
 
516
                                                        free_buf(fd, text);
 
517
                                                        return -1;
 
518
                                                }
 
519
                                                agrep_outpointer += outindex;
 
520
                                        }
 
521
                                }
 
522
                                if (agrep_finalfp != NULL)
 
523
                                        fprintf(agrep_finalfp, "\n");
 
524
                                else {
 
525
                                        if (agrep_outpointer+1>=agrep_outlen) {
 
526
                                                OUTPUT_OVERFLOW;
 
527
                                                free_buf(fd, text);
 
528
                                                return -1;
 
529
                                        }
 
530
                                        else agrep_outbuffer[agrep_outpointer++] = '\n';
 
531
                                }
 
532
 
 
533
                                free_buf(fd, text);
 
534
                                NEW_FILE = OFF;
 
535
                                return 0;
 
536
                        }
 
537
 
 
538
                        CurrentByteOffset = oldCurrentByteOffset + end - start + 1;
 
539
                        start = Max_record - residue;
 
540
                        if(start < 0) {
 
541
                                start = 1; 
 
542
                        }
 
543
                        strncpy(text+start, text+end, residue);
 
544
 
 
545
                        if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
 
546
                            ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
 
547
                                free_buf(fd, text);
 
548
                                return 0;       /* done */
 
549
                        }
 
550
                } /* end of while(num_read = ... */
 
551
                if (!DELIMITER) {
 
552
                        text[start-1] = '\n';
 
553
                        text[start+residue] = '\n';
 
554
                }
 
555
                else {
 
556
                        if (start > D_length) memcpy(text+start-D_length, D_pattern, D_length);
 
557
                        memcpy(text+start+residue, D_pattern, D_length);
 
558
                }
 
559
                end = start + residue;
 
560
                if(residue > 1) {
 
561
                        if (TCOMPRESSED) {
 
562
#if     DOTCOMPRESSED
 
563
                                if(tc_SHORT) tc_m_short(text, start, end);
 
564
                                else      tc_monkey1(text, start, end);
 
565
#endif  /*DOTCOMPRESSED*/
 
566
                        }
 
567
                        else {
 
568
                                if(SHORT) m_short(text, start, end);
 
569
                                else      monkey1(text, start, end);
 
570
                        }
 
571
                        if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {
 
572
                                if (agrep_finalfp != NULL)
 
573
                                        fprintf(agrep_finalfp, "%s", CurrentFileName);
 
574
                                else {
 
575
                                        int outindex;
 
576
                                        for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
 
577
                                                        (CurrentFileName[outindex] != '\0'); outindex++) {
 
578
                                                agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
 
579
                                        }
 
580
                                        if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
 
581
                                                OUTPUT_OVERFLOW;
 
582
                                                free_buf(fd, text);
 
583
                                                return -1;
 
584
                                        }
 
585
                                        agrep_outpointer += outindex;
 
586
                                }
 
587
                                if (PRINTFILETIME) {
 
588
                                        char *s = aprint_file_time(CurrentFileTime);
 
589
                                        if (agrep_finalfp != NULL)
 
590
                                                fprintf(agrep_finalfp, "%s", s);
 
591
                                        else {
 
592
                                                int outindex;
 
593
                                                for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
 
594
                                                                (s[outindex] != '\0'); outindex++) {
 
595
                                                        agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
 
596
                                                }
 
597
                                                if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
 
598
                                                        OUTPUT_OVERFLOW;
 
599
                                                        free_buf(fd, text);
 
600
                                                        return -1;
 
601
                                                }
 
602
                                                agrep_outpointer += outindex;
 
603
                                        }
 
604
                                }
 
605
                                if (agrep_finalfp != NULL)
 
606
                                        fprintf(agrep_finalfp, "\n");
 
607
                                else {
 
608
                                        if (agrep_outpointer+1>=agrep_outlen) {
 
609
                                                OUTPUT_OVERFLOW;
 
610
                                                free_buf(fd, text);
 
611
                                                return -1;
 
612
                                        }
 
613
                                        else agrep_outbuffer[agrep_outpointer++] = '\n';
 
614
                                }
 
615
 
 
616
                                free_buf(fd, text);
 
617
                                NEW_FILE = OFF;
 
618
                                return 0;
 
619
                        }
 
620
                }
 
621
                free_buf(fd, text);
 
622
                return (0);
 
623
#if     AGREP_POINTER
 
624
        }
 
625
        else {
 
626
                text = (unsigned char *)agrep_inbuffer;
 
627
                num_read = agrep_inlen;
 
628
                start = 0;
 
629
                buf_end = end = num_read - 1;
 
630
 
 
631
                        oldCurrentByteOffset = CurrentByteOffset;
 
632
 
 
633
                        if (first_time) {
 
634
                                if ((TCOMPRESSED == ON) && tuncompressible(text+Max_record, num_read)) {
 
635
                                        EASYSEARCH = text[Max_record+SIGNATURE_LEN-1];
 
636
                                        start += SIGNATURE_LEN;
 
637
                                        CurrentByteOffset += SIGNATURE_LEN;
 
638
                                        if (!EASYSEARCH) {
 
639
                                                fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName);
 
640
                                        }
 
641
                                }
 
642
                                else TCOMPRESSED = OFF;
 
643
                                first_time = 0;
 
644
                        }
 
645
 
 
646
                        if (!DELIMITER)
 
647
                                while(text[end]  != r_newline && end > 1) end--;
 
648
                        else {
 
649
                                unsigned char *newbuf = text + end + 1;
 
650
                                newbuf = backward_delimiter(newbuf, text, D_pattern, D_length, OUTTAIL);        /* see agrep.c/'d' */
 
651
                                if (newbuf < text+D_length) newbuf = text + end + 1;
 
652
                                end = newbuf - text - 1;
 
653
                        }
 
654
                        /* text[0] = text[end] = r_newline; : the user must ensure that the delimiter is there at text[0] and occurs somewhere before text[end] */
 
655
 
 
656
                        if (INVERSE && COUNT) countline(text, num_read);
 
657
 
 
658
                        /* An exact copy of the above MGREP_PROCESS */
 
659
                        if (TCOMPRESSED) {      /* separate functions since separate globals => too many if-statements within a single function makes it slow */
 
660
#if     DOTCOMPRESSED
 
661
                                if(tc_SHORT) { if (-1 == tc_m_short(text, start, end)) {free_buf(fd, text); return -1;}}
 
662
                                else      { if (-1 == tc_monkey1(text, start, end)) {free_buf(fd, text); return -1;}}
 
663
#endif  /*DOTCOMPRESSED*/
 
664
                        }
 
665
                        else {
 
666
                                if(SHORT) { if (-1 == m_short(text, start, end)) {free_buf(fd, text); return -1;}}
 
667
                                else      { if (-1 == monkey1(text, start, end)) {free_buf(fd, text); return -1;}}
 
668
                        }
 
669
                        if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {
 
670
                                if (agrep_finalfp != NULL)
 
671
                                        fprintf(agrep_finalfp, "%s", CurrentFileName);
 
672
                                else {
 
673
                                        int outindex;
 
674
                                        for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
 
675
                                                        (CurrentFileName[outindex] != '\0'); outindex++) {
 
676
                                                agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
 
677
                                        }
 
678
                                        if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
 
679
                                                OUTPUT_OVERFLOW;
 
680
                                                free_buf(fd, text);
 
681
                                                return -1;
 
682
                                        }
 
683
                                        agrep_outpointer += outindex;
 
684
                                }
 
685
                                if (PRINTFILETIME) {
 
686
                                        char *s = aprint_file_time(CurrentFileTime);
 
687
                                        if (agrep_finalfp != NULL)
 
688
                                                fprintf(agrep_finalfp, "%s", s);
 
689
                                        else {
 
690
                                                int outindex;
 
691
                                                for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
 
692
                                                                (s[outindex] != '\0'); outindex++) {
 
693
                                                        agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
 
694
                                                }
 
695
                                                if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
 
696
                                                        OUTPUT_OVERFLOW;
 
697
                                                        free_buf(fd, text);
 
698
                                                        return -1;
 
699
                                                }
 
700
                                                agrep_outpointer += outindex;
 
701
                                        }
 
702
                                }
 
703
                                if (agrep_finalfp != NULL)
 
704
                                        fprintf(agrep_finalfp, "\n");
 
705
                                else {
 
706
                                        if (agrep_outpointer+1>=agrep_outlen) {
 
707
                                                OUTPUT_OVERFLOW;
 
708
                                                free_buf(fd, text);
 
709
                                                return -1;
 
710
                                        }
 
711
                                        else agrep_outbuffer[agrep_outpointer++] = '\n';
 
712
                                }
 
713
 
 
714
                                free_buf(fd, text);
 
715
                                NEW_FILE = OFF;
 
716
                                return 0;
 
717
                        }
 
718
 
 
719
                return 0;
 
720
        }
 
721
#endif  /*AGREP_POINTER*/
 
722
#ifdef perf_check
 
723
        fprintf(stderr,"Shifted %d times; shift=0 %d times; hash was = %d times\n",cshift, cshift0, chash);
 
724
        return 0;
 
725
#endif
 
726
} /* end mgrep */
 
727
 
 
728
static void
 
729
countline(text, len)
 
730
unsigned char *text; int len;
 
731
{
 
732
int i;
 
733
        for (i=0; i<len; i++) if(text[i] == '\n') total_line++;
 
734
}
 
735
 
 
736
/* Stuff that always needs to be printed whenever there is a match in all functions in this file */
 
737
int
 
738
print_options(pat_index, text, curtextbegin, curtextend)
 
739
        int     pat_index;
 
740
        unsigned char   *text, *curtextbegin, *curtextend;
 
741
{
 
742
        int     PRINTED = 0;
 
743
        if (SILENT) return 0;
 
744
        if(FNAME && (NEW_FILE || !POST_FILTER)) {
 
745
                char    nextchar = (POST_FILTER == ON)?'\n':' ';
 
746
                char    *prevstring = (POST_FILTER == ON)?"\n":"";
 
747
 
 
748
                if (agrep_finalfp != NULL)
 
749
                        fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName);
 
750
                else {
 
751
                        int outindex;
 
752
                        if (prevstring[0] != '\0') {
 
753
                                if(agrep_outpointer + 1 >= agrep_outlen) {
 
754
                                        OUTPUT_OVERFLOW;
 
755
                                        return -1;
 
756
                                }
 
757
                                else agrep_outbuffer[agrep_outpointer ++] = prevstring[0];
 
758
                        }
 
759
                        for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
 
760
                                        (CurrentFileName[outindex] != '\0'); outindex++) {
 
761
                                agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
 
762
                        }
 
763
                        if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
 
764
                                OUTPUT_OVERFLOW;
 
765
                                return -1;
 
766
                        }
 
767
                        agrep_outpointer += outindex;
 
768
                }
 
769
                if (PRINTFILETIME) {
 
770
                        char *s = aprint_file_time(CurrentFileTime);
 
771
                        if (agrep_finalfp != NULL)
 
772
                                fprintf(agrep_finalfp, "%s", s);
 
773
                        else {
 
774
                                int outindex;
 
775
                                for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
 
776
                                                (s[outindex] != '\0'); outindex++) {
 
777
                                        agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
 
778
                                }
 
779
                                if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
 
780
                                        OUTPUT_OVERFLOW;
 
781
                                        return -1;
 
782
                                }
 
783
                                agrep_outpointer += outindex;
 
784
                        }
 
785
                }
 
786
                if (agrep_finalfp != NULL)
 
787
                        fprintf(agrep_finalfp, ":%c", nextchar);
 
788
                else {
 
789
                        if (agrep_outpointer+2>= agrep_outlen) {
 
790
                                OUTPUT_OVERFLOW;
 
791
                                return -1;
 
792
                        }
 
793
                        else {
 
794
                                agrep_outbuffer[agrep_outpointer++] = ':';
 
795
                                agrep_outbuffer[agrep_outpointer++] = nextchar;
 
796
                        }
 
797
                }
 
798
 
 
799
                NEW_FILE = OFF;
 
800
                PRINTED = 1;
 
801
        }
 
802
 
 
803
        if (PRINTPATTERN) {
 
804
                if (agrep_finalfp != NULL)
 
805
                        fprintf(agrep_finalfp, "%d- ", pat_index);
 
806
                else {
 
807
                        char s[32];
 
808
                        int outindex;
 
809
                        sprintf(s, "%d- ", pat_index);
 
810
                        for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
 
811
                                        (s[outindex] != '\0'); outindex++) {
 
812
                                agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
 
813
                        }
 
814
                        if (s[outindex] != '\0') {
 
815
                                OUTPUT_OVERFLOW;
 
816
                                return -1;
 
817
                        }
 
818
                        agrep_outpointer += outindex;
 
819
                }
 
820
                PRINTED = 1;
 
821
        }
 
822
 
 
823
        if (BYTECOUNT) {
 
824
                if (agrep_finalfp != NULL)
 
825
                        fprintf(agrep_finalfp, "%d= ", CurrentByteOffset);
 
826
                else {
 
827
                        char s[32];
 
828
                        int outindex;
 
829
                        sprintf(s, "%d= ", CurrentByteOffset);
 
830
                        for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
 
831
                                        (s[outindex] != '\0'); outindex++) {
 
832
                                agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
 
833
                        }
 
834
                        if (s[outindex] != '\0') {
 
835
                                OUTPUT_OVERFLOW;
 
836
                                return -1;
 
837
                        }
 
838
                        agrep_outpointer += outindex;
 
839
                }
 
840
                PRINTED = 1;
 
841
        }
 
842
 
 
843
        if (PRINTOFFSET) {
 
844
                if (agrep_finalfp != NULL)
 
845
                        fprintf(agrep_finalfp, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);
 
846
                else {
 
847
                        char s[32];
 
848
                        int outindex;
 
849
                        sprintf(s, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);
 
850
                        for (outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
 
851
                                         (s[outindex] != '\0'); outindex ++) {
 
852
                                agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
 
853
                        }
 
854
                        if (s[outindex] != '\0') {
 
855
                                OUTPUT_OVERFLOW;
 
856
                                return -1;
 
857
                        }
 
858
                        agrep_outpointer += outindex;
 
859
                }
 
860
                PRINTED = 1;
 
861
        }
 
862
        return PRINTED;
 
863
}
 
864
 
 
865
int
 
866
monkey1( text, start, end  ) 
 
867
int start, end; register unsigned char *text;
 
868
{
 
869
        int PRINTED = 0;
 
870
        int num=0;
 
871
        unsigned char *oldtext;
 
872
        int pat_index;
 
873
        register uchar *textend;
 
874
        unsigned char *textbegin;
 
875
        unsigned char *curtextend;
 
876
        unsigned char *curtextbegin;
 
877
        register unsigned hash;
 
878
        register uchar shift;
 
879
        register int  m1, Long=LONG;
 
880
        int MATCHED=0;
 
881
        register uchar *qx;
 
882
        register uchar *px;
 
883
        register int p, p_end;
 
884
        uchar *lastout;
 
885
        /* int OUT=0; */
 
886
        int hash2;
 
887
        int j;
 
888
        int DOWITHMASK;
 
889
 
 
890
        DOWITHMASK = 0;
 
891
        if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
892
        textbegin = text + start;
 
893
        textend = text + end;
 
894
        m1 = p_size-1;
 
895
        lastout = text+start;
 
896
        text = text + start + m1 -1 ;
 
897
        /* -1 to allow match to the first \n in case the pattern has ^ in front of it */
 
898
/*
 
899
        if (WORDBOUND || WHOLELINE) text = text-1;
 
900
        if (WHOLELINE) text = text-1;
 
901
*/
 
902
                /* to accomodate the extra 2 W_delim */
 
903
        while (text <= textend) {
 
904
                hash=tr1[*text];
 
905
                hash=(hash<<Hbits)+(tr1[*(text-1)]);
 
906
                if(Long) hash=(hash<<Hbits)+(tr1[*(text-2)]);
 
907
                shift = SHIFT1[hash];
 
908
#ifdef perf_check
 
909
                cshift++;
 
910
#endif
 
911
                if(shift == 0) {
 
912
                        hash=hash&mask5;
 
913
                        hash2 = (tr[*(text-m1)]<<8) + tr[*(text-m1+1)];
 
914
                        p = HASH[hash];
 
915
#ifdef perf_check
 
916
                        cshift0++;
 
917
#endif
 
918
                        p_end = HASH[hash+1];
 
919
#ifdef debug
 
920
                        printf("hash=%d, p=%d, p_end=%d\n", hash, p, p_end);
 
921
#endif
 
922
                        while(p++ < p_end) {
 
923
                                if(hash2 != Hash2[p]) continue;
 
924
#ifdef perf_check
 
925
                                chash++;
 
926
#endif
 
927
                                if (((pat_index = pat_indices[p]) <= 0) || (pat_len[pat_index] <= 0)) continue;
 
928
                                px = PatPtr[p];
 
929
                                qx = text-m1;
 
930
                                while((*px!=0)&&(tr[*px] == tr[*qx])) {
 
931
                                        px++;
 
932
                                        qx++;
 
933
                                }
 
934
                                if (*px == 0) {
 
935
                                        if(text > textend) return 0;
 
936
                                        if (WORDBOUND) {
 
937
                                                if (isalnum(*(unsigned char *)qx)) goto skip_output;
 
938
                                                if (isalnum(*(unsigned char *)(text-m1-1))) goto skip_output;
 
939
                                        }
 
940
                                        if (!DOWITHMASK) {
 
941
                                                /* Don't update CurrentByteOffset here: only before outputting properly */
 
942
                                                if (!DELIMITER) {
 
943
                                                        curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
 
944
                                                        if (*curtextbegin == '\n') curtextbegin ++;
 
945
                                                        curtextend = curtextbegin /*text-m1*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
 
946
                                                        if (*curtextend == '\n') curtextend ++;
 
947
                                                }
 
948
                                                else {
 
949
                                                        curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL);
 
950
                                                        curtextend = forward_delimiter(curtextbegin /*text-m1*/, textend, D_pattern, D_length, OUTTAIL);
 
951
                                                }
 
952
                                                if (!OUTTAIL || INVERSE) textbegin = curtextend;
 
953
                                                else if (DELIMITER) textbegin = curtextend - D_length;
 
954
                                                else textbegin = curtextend - 1;
 
955
                                        }
 
956
 
 
957
                                        DOWITHMASK = 1;
 
958
                                        if (pat_index <= anum_terminals) {
 
959
                                                int     iii;
 
960
                                                amatched_terminals[pat_index - 1] = 1;
 
961
                                                for (iii=0; iii<anum_terminals; iii++)
 
962
                                                        if (aduplicates[pat_index - 1][iii])
 
963
                                                                amatched_terminals[iii] = 1;
 
964
                                        }
 
965
                                        if (AComplexBoolean) {
 
966
                                                /* Can output only after all the matches in the current record have been identified: just like filter_output */
 
967
                                                oldtext = text;
 
968
                                                CurrentByteOffset += (oldtext + pat_len[pat_index] - 1 - text);
 
969
                                                text = oldtext + pat_len[pat_index] - 1;
 
970
                                                MATCHED = 0;
 
971
                                                goto skip_output;
 
972
                                        }
 
973
                                        else if ((long)AParse & AND_EXP) {
 
974
                                                for (j=0; j<anum_terminals; j++) if (!amatched_terminals[j]) break;
 
975
                                                if (j<anum_terminals) goto skip_output;
 
976
                                        }
 
977
                                        MATCHED=1;
 
978
                                        oldtext = text; /* only for MULTI_OUTPUT */
 
979
 
 
980
#undef  DO_OUTPUT
 
981
#define DO_OUTPUT(change_text)\
 
982
                                        num_of_matched++;\
 
983
                                        if(FILENAMEONLY || SILENT)  return 0;\
 
984
                                        if (!COUNT) {\
 
985
num ++;\
 
986
                                                if ((PRINTED = print_options(pat_index, text, curtextbegin, curtextend)) == -1) return -1;\
 
987
                                                if(!INVERSE) {\
 
988
                                                        if (PRINTRECORD) {\
 
989
                                                        if (agrep_finalfp != NULL) {\
 
990
                                                                fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp);\
 
991
                                                        }\
 
992
                                                        else {\
 
993
                                                                if (agrep_outpointer + curtextend - curtextbegin>= agrep_outlen) {\
 
994
                                                                        OUTPUT_OVERFLOW;\
 
995
                                                                        return -1;\
 
996
                                                                }\
 
997
                                                                else {\
 
998
                                                                        memcpy(agrep_outbuffer + agrep_outpointer, curtextbegin, curtextend-curtextbegin);\
 
999
                                                                        agrep_outpointer += curtextend - curtextbegin;\
 
1000
                                                                }\
 
1001
                                                        }\
 
1002
                                                        }\
 
1003
                                                        else if (PRINTED) {\
 
1004
                                                                if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);\
 
1005
                                                                else agrep_outbuffer[agrep_outpointer ++] = '\n';\
 
1006
                                                                PRINTED = 0;\
 
1007
                                                        }\
 
1008
                                                        if ((change_text) && MULTI_OUTPUT) {     /* next match starting from end of current */\
 
1009
                                                                CurrentByteOffset += (oldtext + pat_len[pat_index] - 1 - text);\
 
1010
                                                                text = oldtext + pat_len[pat_index] - 1;\
 
1011
                                                                MATCHED = 0;\
 
1012
                                                        }\
 
1013
                                                        else if (change_text) {\
 
1014
                                                                CurrentByteOffset += textbegin - text;\
 
1015
                                                                text = textbegin;\
 
1016
                                                        }\
 
1017
                                                }\
 
1018
                                                else {  /* INVERSE */\
 
1019
                                                        /* if(lastout < curtextbegin) OUT=1; */\
 
1020
                                                        if (!SILENT) {\
 
1021
                                                        if (agrep_finalfp != NULL)\
 
1022
                                                                fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp);\
 
1023
                                                        else {\
 
1024
                                                                if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) {\
 
1025
                                                                        OUTPUT_OVERFLOW;\
 
1026
                                                                        return -1;\
 
1027
                                                                }\
 
1028
                                                                memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout);\
 
1029
                                                                agrep_outpointer += (curtextbegin-lastout);\
 
1030
                                                        }\
 
1031
                                                        }\
 
1032
                                                        lastout=textbegin;\
 
1033
                                                        if (change_text) {\
 
1034
                                                                CurrentByteOffset += textbegin - text;\
 
1035
                                                                text = textbegin;\
 
1036
                                                        }\
 
1037
                                                }\
 
1038
                                        }\
 
1039
                                        else if (change_text) { /* COUNT */\
 
1040
                                                CurrentByteOffset += textbegin - text;\
 
1041
                                                text = textbegin;\
 
1042
                                        }\
 
1043
                                        if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||\
 
1044
                                            ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0;   /* done */\
 
1045
 
 
1046
                                        DO_OUTPUT(1)
 
1047
                                }
 
1048
 
 
1049
                        skip_output:
 
1050
                                if (MATCHED && !MULTI_OUTPUT && !AComplexBoolean) break;    /* else look for more possible matches since we never know how many will match */
 
1051
                                if (DOWITHMASK && (text >= curtextend - 1)) {
 
1052
                                        DOWITHMASK = 0;
 
1053
                                        if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
 
1054
                                                DO_OUTPUT(0)
 
1055
                                        }
 
1056
                                        if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1057
                                }
 
1058
                        }
 
1059
                        /* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */
 
1060
                        if (DOWITHMASK && (text >= curtextend - 1)) {
 
1061
                                DOWITHMASK = 0;
 
1062
                                if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
 
1063
                                        DO_OUTPUT(0)
 
1064
                                }
 
1065
                                if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1066
                        }
 
1067
                        if(!MATCHED) shift = 1; /* || MULTI_OUTPUT is implicit */
 
1068
                        else {
 
1069
                                MATCHED = 0;
 
1070
                                shift = m1 - 1 > 0 ? m1 - 1 : 1;
 
1071
                        }
 
1072
                }
 
1073
 
 
1074
                /* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */
 
1075
                if (DOWITHMASK && (text >= curtextend - 1)) {
 
1076
                        DOWITHMASK = 0;
 
1077
                        if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
 
1078
                                DO_OUTPUT(0)
 
1079
                        }
 
1080
                        if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1081
                }
 
1082
 
 
1083
                text += shift;
 
1084
                CurrentByteOffset += shift;
 
1085
        }
 
1086
 
 
1087
        /* Do residual stuff: check if there was a match at the end of the line | check if rest of the buffer needs to be output due to inverse */
 
1088
 
 
1089
        if (DOWITHMASK && (text >= curtextend - 1)) {
 
1090
                DOWITHMASK = 0;
 
1091
                if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
 
1092
                        DO_OUTPUT(0)
 
1093
                }
 
1094
                if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1095
        }
 
1096
 
 
1097
        if(INVERSE && !COUNT && (lastout <= textend)) {
 
1098
                if (!SILENT) {
 
1099
                if (agrep_finalfp != NULL) {
 
1100
                        while(lastout <= textend) fputc(*lastout++, agrep_finalfp);
 
1101
                }
 
1102
                else {
 
1103
                        if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) {
 
1104
                                OUTPUT_OVERFLOW;
 
1105
                                return -1;
 
1106
                        }
 
1107
                        memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout+1);
 
1108
                        agrep_outpointer += (textend-lastout+1);
 
1109
                        lastout = textend;
 
1110
                }
 
1111
                }
 
1112
        }
 
1113
 
 
1114
        return 0;
 
1115
}
 
1116
 
 
1117
#if     DOTCOMPRESSED
 
1118
int
 
1119
tc_monkey1( text, start, end  ) 
 
1120
int start, end;
 
1121
register unsigned char *text;
 
1122
{
 
1123
        int PRINTED = 0;
 
1124
        unsigned char *oldtext;
 
1125
        int pat_index;
 
1126
        register uchar *textend;
 
1127
        unsigned char *textbegin;
 
1128
        unsigned char *curtextend;
 
1129
        unsigned char *curtextbegin;
 
1130
        register unsigned hash;
 
1131
        register uchar shift;
 
1132
        register int  m1, Long=LONG;
 
1133
        int MATCHED=0;
 
1134
        register uchar *qx;
 
1135
        register uchar *px;
 
1136
        register int p, p_end;
 
1137
        uchar *lastout;
 
1138
        /* int OUT=0; */
 
1139
        int hash2;
 
1140
        int j;
 
1141
        int DOWITHMASK;
 
1142
        struct timeval initt, finalt;
 
1143
        int newlen;
 
1144
 
 
1145
        DOWITHMASK = 0;
 
1146
        if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1147
        textbegin = text + start;
 
1148
        textend = text + end;
 
1149
        m1 = tc_p_size-1;
 
1150
        lastout = text+start;
 
1151
        text = text + start + m1 -1;
 
1152
        /* -1 to allow match to the first \n in case the pattern has ^ in front of it */
 
1153
        /* WORDBOUND adjustment not required */
 
1154
        while (text <= textend) {
 
1155
                hash=tc_tr1[*text];
 
1156
                hash=(hash<<Hbits)+(tc_tr1[*(text-1)]);
 
1157
                if(Long) hash=(hash<<Hbits)+(tc_tr1[*(text-2)]);
 
1158
                shift = tc_SHIFT1[hash];
 
1159
#ifdef perf_check
 
1160
                cshift++;
 
1161
#endif
 
1162
                if(shift == 0) {
 
1163
                        hash=hash&mask5;
 
1164
                        hash2 = (tc_tr[*(text-m1)]<<8) + tc_tr[*(text-m1+1)];
 
1165
                        p = tc_HASH[hash];
 
1166
#ifdef perf_check
 
1167
                        cshift0++;
 
1168
#endif
 
1169
                        p_end = tc_HASH[hash+1];
 
1170
#ifdef debug
 
1171
                        printf("hash=%d, p=%d, p_end=%d\n", hash, p, p_end);
 
1172
#endif
 
1173
                        while(p++ < p_end) {
 
1174
                                if(hash2 != tc_Hash2[p]) continue;
 
1175
#ifdef perf_check
 
1176
                                chash++;
 
1177
#endif
 
1178
                                if (((pat_index = tc_pat_indices[p]) <= 0) || (tc_pat_len[pat_index] <= 0)) continue;
 
1179
                                px = tc_PatPtr[p];
 
1180
                                qx = text-m1;
 
1181
 
 
1182
                                while((*px!=0)&&(tc_tr[*px] == tc_tr[*qx])) {
 
1183
                                        px++;
 
1184
                                        qx++;
 
1185
                                }
 
1186
                                if (*px == 0) {
 
1187
                                        if(text > textend) return 0;
 
1188
                                        if (!DOWITHMASK) {
 
1189
                                                /* Don't update CurrentByteOffset here: only before outputting properly */
 
1190
                                                if (!DELIMITER) {
 
1191
                                                        curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
 
1192
                                                        if (*curtextbegin == '\n') curtextbegin ++;
 
1193
                                                        curtextend = curtextbegin /*text-m1*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
 
1194
                                                        if (*curtextend == '\n') curtextend ++;
 
1195
                                                }
 
1196
                                                else {
 
1197
                                                        curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL);
 
1198
                                                        curtextend = forward_delimiter(curtextbegin /*text-m1*/, textend, tc_D_pattern, tc_D_length, OUTTAIL);
 
1199
                                                }
 
1200
                                        }
 
1201
                                        /* else prev curtextbegin is OK: if full AND isn't found, DOWITHMASK is 0-ed so that we search at most 1 line below */
 
1202
#if     MEASURE_TIMES
 
1203
                                        gettimeofday(&initt, NULL);
 
1204
#endif  /*MEASURE_TIMES*/
 
1205
                                        /* Was it really a match in the compressed line from prev line in text to text + strlen(tc_pat_len[pat_index]? */
 
1206
                                        if (-1==exists_tcompressed_word(tc_PatPtr[p], tc_pat_len[pat_index], curtextbegin, text - curtextbegin + tc_pat_len[pat_index], EASYSEARCH))
 
1207
                                                goto skip_output;
 
1208
#if     MEASURE_TIMES
 
1209
                                        gettimeofday(&finalt, NULL);
 
1210
                                        FILTERALGO_ms +=  (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
 
1211
#endif  /*MEASURE_TIMES*/
 
1212
                                        if (!DOWITHMASK) {
 
1213
                                                if (!OUTTAIL || INVERSE) textbegin = curtextend;
 
1214
                                                else if (DELIMITER) textbegin = curtextend - D_length;
 
1215
                                                else textbegin = curtextend - 1;
 
1216
                                        }
 
1217
                                        DOWITHMASK = 1;
 
1218
                                        if (pat_index <= anum_terminals) {
 
1219
                                                int     iii;
 
1220
                                                amatched_terminals[pat_index - 1] = 1;
 
1221
                                                for (iii=0; iii<anum_terminals; iii++)
 
1222
                                                        if (aduplicates[pat_index - 1][iii])
 
1223
                                                                amatched_terminals[iii] = 1;
 
1224
                                        }
 
1225
                                        if (AComplexBoolean) {
 
1226
                                                /* Can output only after all the matches in the current record have been identified: just like filter_output */
 
1227
                                                oldtext = text;
 
1228
                                                CurrentByteOffset += (oldtext + pat_len[pat_index] - 1 - text);
 
1229
                                                text = oldtext + pat_len[pat_index] - 1;
 
1230
                                                MATCHED = 0;
 
1231
                                                goto skip_output;
 
1232
                                        }
 
1233
                                        else if ((long)AParse & AND_EXP) {
 
1234
                                                for (j=0; j<anum_terminals; j++) if (!amatched_terminals[j]) break;
 
1235
                                                if (j<anum_terminals) goto skip_output;
 
1236
                                        }
 
1237
 
 
1238
                                        MATCHED=1;
 
1239
                                        oldtext = text; /* only for MULTI_OUTPUT */
 
1240
 
 
1241
#undef  DO_OUTPUT
 
1242
#define DO_OUTPUT(change_text)\
 
1243
                                        num_of_matched++;\
 
1244
                                        if(FILENAMEONLY || SILENT)  return 0;\
 
1245
                                        if (!COUNT) {\
 
1246
                                                if ((PRINTED = print_options(pat_index, text, curtextbegin, curtextend)) == -1) return -1;\
 
1247
                                                if(!INVERSE) {\
 
1248
                                                        if (PRINTRECORD) {\
 
1249
/* #if     MEASURE_TIMES\
 
1250
                                                        gettimeofday(&initt, NULL);\
 
1251
*/ /*#endif  MEASURE_TIMES */\
 
1252
                                                        if (agrep_finalfp != NULL)\
 
1253
                                                                newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_finalfp, -1, EASYSEARCH);\
 
1254
                                                        else {\
 
1255
                                                                if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {\
 
1256
                                                                        if (newlen + agrep_outpointer >= agrep_outlen) {\
 
1257
                                                                                OUTPUT_OVERFLOW;\
 
1258
                                                                                return -1;\
 
1259
                                                                        }\
 
1260
                                                                        agrep_outpointer += newlen;\
 
1261
                                                                }\
 
1262
                                                        }\
 
1263
/* #if     MEASURE_TIMES\
 
1264
                                                        gettimeofday(&finalt, NULL);\
 
1265
                                                        OUTFILTER_ms += (finalt.tv_sec* 1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);\
 
1266
*/ /*#endif  MEASURE_TIMES */\
 
1267
                                                        }\
 
1268
                                                        else if (PRINTED) {\
 
1269
                                                                if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);\
 
1270
                                                                else agrep_outbuffer[agrep_outpointer ++] = '\n';\
 
1271
                                                                PRINTED = 0;\
 
1272
                                                        }\
 
1273
                                                        if ((change_text) && MULTI_OUTPUT) {     /* next match starting from end of current */\
 
1274
                                                                CurrentByteOffset += (oldtext + tc_pat_len[pat_index] - 1 - text);\
 
1275
                                                                text = oldtext + tc_pat_len[pat_index] - 1;\
 
1276
                                                                MATCHED = 0;\
 
1277
                                                        }\
 
1278
                                                        else if (change_text) {\
 
1279
                                                                CurrentByteOffset += textbegin - text;\
 
1280
                                                                text = textbegin;\
 
1281
                                                        }\
 
1282
                                                }\
 
1283
                                                else {  /* INVERSE: Don't care about filtering time */\
 
1284
                                                        /* if(lastout < curtextbegin) OUT=1; */\
 
1285
                                                        if (!SILENT) {\
 
1286
                                                        if (agrep_finalfp != NULL)\
 
1287
                                                                newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH);\
 
1288
                                                        else {\
 
1289
                                                                if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {\
 
1290
                                                                        if (newlen + agrep_outpointer >= agrep_outlen) {\
 
1291
                                                                                OUTPUT_OVERFLOW;\
 
1292
                                                                                return -1;\
 
1293
                                                                        }\
 
1294
                                                                        agrep_outpointer += newlen;\
 
1295
                                                                }\
 
1296
                                                        }\
 
1297
                                                        }\
 
1298
                                                        lastout=textbegin;\
 
1299
                                                        if (change_text) {\
 
1300
                                                                CurrentByteOffset += textbegin - text;\
 
1301
                                                                text = textbegin;\
 
1302
                                                        }\
 
1303
                                                }\
 
1304
                                        }\
 
1305
                                        else if (change_text) {\
 
1306
                                                CurrentByteOffset += textbegin - text;\
 
1307
                                                text = textbegin;\
 
1308
                                        }\
 
1309
                                        if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||\
 
1310
                                            ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0;   /* done */\
 
1311
 
 
1312
                                        DO_OUTPUT(1)
 
1313
                                }
 
1314
 
 
1315
                        skip_output:
 
1316
                                if (MATCHED && !MULTI_OUTPUT && !AComplexBoolean) break;    /* else look for more possible matches since we never know how many will match */
 
1317
                                if (DOWITHMASK && (text >= curtextend - 1)) {
 
1318
                                        DOWITHMASK = 0;
 
1319
                                        if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
 
1320
                                                DO_OUTPUT(0)
 
1321
                                        }
 
1322
                                        if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1323
                                }
 
1324
                        }
 
1325
                        /* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */
 
1326
                        if (DOWITHMASK && (text >= curtextend - 1)) {
 
1327
                                DOWITHMASK = 0;
 
1328
                                if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
 
1329
                                        DO_OUTPUT(0)
 
1330
                                }
 
1331
                                if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1332
                        }
 
1333
                        if(!MATCHED) shift = 1; /* || MULTI_OUTPUT is implicit */
 
1334
                        else {
 
1335
                                MATCHED = 0;
 
1336
                                shift = m1 - 1 > 0 ? m1 - 1 : 1;
 
1337
                        }
 
1338
                }
 
1339
 
 
1340
                /* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */
 
1341
                if (DOWITHMASK && (text >= curtextend - 1)) {
 
1342
                        DOWITHMASK = 0;
 
1343
                        if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
 
1344
                                DO_OUTPUT(0)
 
1345
                        }
 
1346
                        if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1347
                }
 
1348
 
 
1349
                text += shift;
 
1350
                CurrentByteOffset += shift;
 
1351
        }
 
1352
 
 
1353
        /* Do residual stuff: check if there was a match at the end of the line | check if rest of the buffer needs to be output due to inverse */
 
1354
 
 
1355
        if (DOWITHMASK && (text >= curtextend - 1)) {
 
1356
                DOWITHMASK = 0;
 
1357
                if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
 
1358
                        DO_OUTPUT(0)
 
1359
                }
 
1360
                if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1361
        }
 
1362
 
 
1363
        if (INVERSE && !COUNT && (lastout <= textend)) {
 
1364
                if (!SILENT) {
 
1365
                if (agrep_finalfp != NULL)
 
1366
                        newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH);
 
1367
                else {
 
1368
                        if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
 
1369
                                if (newlen + agrep_outpointer >= agrep_outlen) {
 
1370
                                        OUTPUT_OVERFLOW;
 
1371
                                        return -1;
 
1372
                                }
 
1373
                                agrep_outpointer += newlen;
 
1374
                        }
 
1375
                }
 
1376
                }
 
1377
        }
 
1378
 
 
1379
        return 0;
 
1380
}
 
1381
#endif  /*DOTCOMPRESSED*/
 
1382
 
 
1383
/* shift is always 1: slight change in MATCHED semantics: it is set to 1 even if COUNT is set: previously, it wasn't set. Will it effect m_short? */
 
1384
int
 
1385
m_short(text, start, end)
 
1386
int start, end; register uchar *text;
 
1387
{
 
1388
        int m1=1;
 
1389
        int PRINTED = 0;
 
1390
        int pat_index;
 
1391
        unsigned char *oldtext;
 
1392
        register uchar *textend;
 
1393
        unsigned char *textbegin;
 
1394
        unsigned char *curtextend;
 
1395
        unsigned char *curtextbegin;
 
1396
        register int p, p_end;
 
1397
        int MATCHED=0;
 
1398
        /* int OUT=0; */
 
1399
        uchar *lastout;
 
1400
        uchar *qx;
 
1401
        uchar *px;
 
1402
        int j;
 
1403
        int DOWITHMASK;
 
1404
 
 
1405
        DOWITHMASK = 0;
 
1406
        if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1407
        textend = text + end;
 
1408
        lastout = text + start;
 
1409
        textbegin = text + start;
 
1410
        text = text + start - 1 ;
 
1411
/*
 
1412
        if (WORDBOUND || WHOLELINE) text = text-1;
 
1413
*/
 
1414
        if (WHOLELINE) text = text-1;
 
1415
                /* to accomodate the extra 2 W_delim */
 
1416
        while (++text <= textend) {
 
1417
                CurrentByteOffset ++;
 
1418
                p = HASH[tr[*text]];
 
1419
                p_end = HASH[tr[*text]+1];
 
1420
                while(p++ < p_end) {
 
1421
                        if (((pat_index = pat_indices[p]) <= 0) || (pat_len[pat_index] <= 0)) continue;
 
1422
#ifdef  debug
 
1423
                        printf("m_short(): p=%d pat_index=%d off=%d\n", p, pat_index, textend - text);
 
1424
#endif
 
1425
                        px = PatPtr[p];
 
1426
                        qx = text;
 
1427
                        while((*px!=0)&&(tr[*px] == tr[*qx])) {
 
1428
                                px++;
 
1429
                                qx++;
 
1430
                        }
 
1431
                        if (*px == 0) {
 
1432
                                if(text >= textend) return 0;
 
1433
                                if (WORDBOUND) {
 
1434
                                        if (isalnum(*(unsigned char *)qx)) goto skip_output;
 
1435
                                        if (isalnum(*(unsigned char *)(text-1))) goto skip_output;
 
1436
                                }
 
1437
                                if (!DOWITHMASK) {
 
1438
                                        /* Don't update CurrentByteOffset here: only before outputting properly */
 
1439
                                        if (!DELIMITER) {
 
1440
                                                curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
 
1441
                                                if (*curtextbegin == '\n') curtextbegin ++;
 
1442
                                                curtextend = curtextbegin /*text-m1*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
 
1443
                                                if (*curtextend == '\n') curtextend ++;
 
1444
                                        }
 
1445
                                        else {
 
1446
                                                curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL);
 
1447
                                                curtextend = forward_delimiter(curtextbegin /*text-m1*/, textend, D_pattern, D_length, OUTTAIL);
 
1448
                                        }
 
1449
                                        if (!OUTTAIL || INVERSE) textbegin = curtextend;
 
1450
                                        else if (DELIMITER) textbegin = curtextend - D_length;
 
1451
                                        else textbegin = curtextend - 1;
 
1452
                                }
 
1453
                                /* else prev curtextbegin is OK: if full AND isn't found, DOWITHMASK is 0-ed so that we search at most 1 line below */
 
1454
                                DOWITHMASK = 1;
 
1455
 
 
1456
                                if (pat_index <= anum_terminals) {
 
1457
                                        int     iii;
 
1458
                                        amatched_terminals[pat_index - 1] = 1;
 
1459
                                        for (iii=0; iii<anum_terminals; iii++)
 
1460
                                                if (aduplicates[pat_index - 1][iii])
 
1461
                                                        amatched_terminals[iii] = 1;
 
1462
                                }       
 
1463
                                if (AComplexBoolean) {
 
1464
                                        /* Can output only after all the matches in the current record have been identified: just like filter_output */
 
1465
                                        oldtext = text;
 
1466
                                        CurrentByteOffset += (oldtext + pat_len[pat_index] - 1 - text);
 
1467
                                        text = oldtext + pat_len[pat_index] - 1;
 
1468
                                        MATCHED = 0;
 
1469
                                        goto skip_output;
 
1470
                                }
 
1471
                                else if ((long)AParse & AND_EXP) {
 
1472
                                        for (j=0; j<anum_terminals; j++) if (!amatched_terminals[j]) break;
 
1473
                                        if (j<anum_terminals) goto skip_output;
 
1474
                                }
 
1475
 
 
1476
                                MATCHED = 1;
 
1477
                                oldtext = text; /* used only if MULTI_OUTPUT */
 
1478
 
 
1479
#undef  DO_OUTPUT
 
1480
#define DO_OUTPUT(change_text)\
 
1481
                                num_of_matched++;\
 
1482
                                if(FILENAMEONLY || SILENT)  return 0;\
 
1483
                                if (!COUNT) {\
 
1484
                                        if ((PRINTED = print_options(pat_index, text, curtextbegin, curtextend)) == -1) return -1;\
 
1485
                                        if(!INVERSE) {\
 
1486
                                                if (PRINTRECORD) {\
 
1487
                                                if (agrep_finalfp != NULL) {\
 
1488
                                                        fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp);\
 
1489
                                                }\
 
1490
                                                else {\
 
1491
                                                        if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) {\
 
1492
                                                                OUTPUT_OVERFLOW;\
 
1493
                                                                return -1;\
 
1494
                                                        }\
 
1495
                                                        else {\
 
1496
                                                                memcpy(agrep_outbuffer + agrep_outpointer, curtextbegin, curtextend-curtextbegin);\
 
1497
                                                                agrep_outpointer += curtextend - curtextbegin;\
 
1498
                                                        }\
 
1499
                                                }\
 
1500
                                                }\
 
1501
                                                else if (PRINTED) {\
 
1502
                                                        if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);\
 
1503
                                                        else agrep_outbuffer[agrep_outpointer ++] = '\n';\
 
1504
                                                        PRINTED = 0;\
 
1505
                                                }\
 
1506
                                                if ((change_text) && MULTI_OUTPUT) {     /* next match starting from end of current */\
 
1507
                                                        CurrentByteOffset += (oldtext + pat_len[pat_index] - 1 - text);\
 
1508
                                                        text = oldtext + pat_len[pat_index] - 1;\
 
1509
                                                        MATCHED = 0;\
 
1510
                                                }\
 
1511
                                                else if (change_text) {\
 
1512
                                                        CurrentByteOffset += textbegin - text;\
 
1513
                                                        text = textbegin;\
 
1514
                                                }\
 
1515
                                        }\
 
1516
                                        else {\
 
1517
                                                /* if(lastout < curtextbegin) OUT=1; */\
 
1518
                                                if (!SILENT) {\
 
1519
                                                if (agrep_finalfp != NULL)\
 
1520
                                                        fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp);\
 
1521
                                                else {\
 
1522
                                                        if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) {\
 
1523
                                                                OUTPUT_OVERFLOW;\
 
1524
                                                                return -1;\
 
1525
                                                        }\
 
1526
                                                        memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout);\
 
1527
                                                        agrep_outpointer += (curtextbegin-lastout);\
 
1528
                                                }\
 
1529
                                                }\
 
1530
                                                lastout=textbegin;\
 
1531
                                                if (change_text) {\
 
1532
                                                        CurrentByteOffset += textbegin - text;\
 
1533
                                                        text = textbegin;\
 
1534
                                                }\
 
1535
                                        }\
 
1536
                                }\
 
1537
                                else if (change_text) {\
 
1538
                                        CurrentByteOffset += textbegin - text;\
 
1539
                                        text = textbegin;\
 
1540
                                }\
 
1541
                                if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||\
 
1542
                                    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0;   /* done */\
 
1543
 
 
1544
                                DO_OUTPUT(1)
 
1545
                        }
 
1546
 
 
1547
                skip_output:
 
1548
                        if(MATCHED && !MULTI_OUTPUT && !AComplexBoolean) break;     /* else look for more possible matches */
 
1549
                        if (DOWITHMASK && (text >= curtextend - 1)) {
 
1550
                                DOWITHMASK = 0;
 
1551
                                if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
 
1552
                                        DO_OUTPUT(0)
 
1553
                                }
 
1554
                                if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1555
                        }
 
1556
                }
 
1557
                /* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */
 
1558
                if (DOWITHMASK && (text >= curtextend - 1)) {
 
1559
                        DOWITHMASK = 0;
 
1560
                        if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
 
1561
                                DO_OUTPUT(0)
 
1562
                        }
 
1563
                        if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1564
                }
 
1565
                if (MATCHED) text --;
 
1566
                MATCHED = 0;
 
1567
        } /* while */
 
1568
        CurrentByteOffset ++;
 
1569
 
 
1570
        /* Do residual stuff: check if there was a match at the end of the line | check if rest of the buffer needs to be output due to inverse */
 
1571
 
 
1572
        if (DOWITHMASK && (text >= curtextend - 1)) {
 
1573
                DOWITHMASK = 0;
 
1574
                if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
 
1575
                        DO_OUTPUT(0)
 
1576
                }
 
1577
                if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1578
        }
 
1579
 
 
1580
        if(INVERSE && !COUNT && (lastout <= textend)) {
 
1581
                if (!SILENT) {
 
1582
                if (agrep_finalfp != NULL) {
 
1583
                        while(lastout <= textend) fputc(*lastout++, agrep_finalfp);
 
1584
                }
 
1585
                else {
 
1586
                        if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) {
 
1587
                                OUTPUT_OVERFLOW;
 
1588
                                return -1;
 
1589
                        }
 
1590
                        memcpy(agrep_outbuffer+agrep_outpointer, lastout, text-lastout+1);
 
1591
                        agrep_outpointer += (text-lastout+1);
 
1592
                        lastout = textend;
 
1593
                }
 
1594
                }
 
1595
        }
 
1596
 
 
1597
        return 0;
 
1598
}
 
1599
 
 
1600
#if     DOTCOMPRESSED
 
1601
/* shift is always 1: slight change in MATCHED semantics: it is set to 1 even if COUNT is set: previously, it wasn't set. Will it effect m_short? */
 
1602
int
 
1603
tc_m_short(text, start, end)
 
1604
int start, end; register uchar *text;
 
1605
{
 
1606
        int m1=1;
 
1607
        int PRINTED = 0;
 
1608
        int pat_index;
 
1609
        unsigned char *oldtext;
 
1610
        register uchar *textend;
 
1611
        unsigned char *textbegin;
 
1612
        unsigned char *curtextend;
 
1613
        unsigned char *curtextbegin;
 
1614
        register int p, p_end;
 
1615
        int MATCHED=0;
 
1616
        /* int OUT=0; */
 
1617
        uchar *lastout;
 
1618
        uchar *qx;
 
1619
        uchar *px;
 
1620
        int j;
 
1621
        int DOWITHMASK;
 
1622
        struct timeval initt, finalt;
 
1623
        int newlen;
 
1624
 
 
1625
        DOWITHMASK = 0;
 
1626
        if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1627
        textend = text + end;
 
1628
        lastout = text + start;
 
1629
        text = text + start - 1 ;
 
1630
        textbegin = text + start;
 
1631
        /* WORDBOUND adjustment not required */
 
1632
        while (++text <= textend) {
 
1633
                CurrentByteOffset ++;
 
1634
                p = tc_HASH[tc_tr[*text]];
 
1635
                p_end = tc_HASH[tc_tr[*text]+1];
 
1636
                while(p++ < p_end) {
 
1637
                        if (((pat_index = tc_pat_indices[p]) <= 0) || (tc_pat_len[pat_index] <= 0)) continue;
 
1638
#ifdef  debug
 
1639
                        printf("m_short(): p=%d pat_index=%d off=%d\n", p, pat_index, textend - text);
 
1640
#endif
 
1641
                        px = tc_PatPtr[p];
 
1642
                        qx = text;
 
1643
                        while((*px!=0)&&(tc_tr[*px] == tc_tr[*qx])) {
 
1644
                                px++;
 
1645
                                qx++;
 
1646
                        }
 
1647
                        if (*px == 0) {
 
1648
                                if(text >= textend) return 0;
 
1649
 
 
1650
                                if (!DOWITHMASK) {
 
1651
                                        /* Don't update CurrentByteOffset here: only before outputting properly */
 
1652
                                        if (!DELIMITER) {
 
1653
                                                curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
 
1654
                                                if (*curtextbegin == '\n') curtextbegin ++;
 
1655
                                                curtextend = curtextbegin /*text-m1*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
 
1656
                                                if (*curtextend == '\n') curtextend ++;
 
1657
                                        }
 
1658
                                        else {
 
1659
                                                curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL);
 
1660
                                                curtextend = forward_delimiter(curtextbegin /*text-m1*/, textend, tc_D_pattern, tc_D_length, OUTTAIL);
 
1661
                                        }
 
1662
                                }
 
1663
                                /* else prev curtextbegin is OK: if full AND isn't found, DOWITHMASK is 0-ed so that we search at most 1 line below */
 
1664
#if     MEASURE_TIMES
 
1665
                                gettimeofday(&initt, NULL);
 
1666
#endif  /*MEASURE_TIMES*/
 
1667
                                /* Was it really a match in the compressed line from prev line in text to text + strlen(tc_pat_len[pat_index]? */
 
1668
                                if (-1 == exists_tcompressed_word(tc_PatPtr[p], tc_pat_len[pat_index], curtextbegin, text - curtextbegin + tc_pat_len[pat_index], EASYSEARCH))
 
1669
                                        goto skip_output;
 
1670
#if     MEASURE_TIMES
 
1671
                                gettimeofday(&finalt, NULL);
 
1672
                                FILTERALGO_ms +=  (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
 
1673
#endif  /*MEASURE_TIMES*/
 
1674
 
 
1675
                                if (!DOWITHMASK) {
 
1676
                                        if (!OUTTAIL || INVERSE) textbegin = curtextend;
 
1677
                                        else if (DELIMITER) textbegin = curtextend - D_length;
 
1678
                                        else textbegin = curtextend - 1;
 
1679
                                }
 
1680
                                DOWITHMASK = 1;
 
1681
                                if (pat_index <= anum_terminals) {
 
1682
                                        int     iii;
 
1683
                                        amatched_terminals[pat_index - 1] = 1;
 
1684
                                        for (iii=0; iii<anum_terminals; iii++)
 
1685
                                                if (aduplicates[pat_index - 1][iii])
 
1686
                                                        amatched_terminals[iii] = 1;
 
1687
                                }
 
1688
                                if (AComplexBoolean) {
 
1689
                                        /* Can output only after all the matches in the current record have been identified: just like filter_output */
 
1690
                                        oldtext = text;
 
1691
                                        CurrentByteOffset += (oldtext + pat_len[pat_index] - 1 - text);
 
1692
                                        text = oldtext + pat_len[pat_index] - 1;
 
1693
                                        MATCHED = 0;
 
1694
                                        goto skip_output;
 
1695
                                }
 
1696
                                else if ((long)AParse & AND_EXP) {
 
1697
                                        for (j=0; j<anum_terminals; j++) if (!amatched_terminals[j]) break;
 
1698
                                        if (j<anum_terminals) goto skip_output;
 
1699
                                }
 
1700
 
 
1701
                                MATCHED = 1;
 
1702
                                oldtext = text; /* used only if MULTI_OUTPUT */
 
1703
 
 
1704
#undef  DO_OUTPUT
 
1705
#define DO_OUTPUT(change_text)\
 
1706
                                num_of_matched++;\
 
1707
                                if(FILENAMEONLY || SILENT)  return 0;\
 
1708
                                if (!COUNT) {\
 
1709
                                        if ((PRINTED = print_options(pat_index, text, curtextbegin, curtextend)) == -1) return -1;\
 
1710
                                        if(!INVERSE) {\
 
1711
                                                if (PRINTRECORD) {\
 
1712
/* #if     MEASURE_TIMES\
 
1713
                                                gettimeofday(&initt, NULL);\
 
1714
*/ /*#endif  MEASURE_TIMES*/\
 
1715
                                                if (agrep_finalfp != NULL)\
 
1716
                                                        newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_finalfp, -1, EASYSEARCH);\
 
1717
                                                else {\
 
1718
                                                        if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {\
 
1719
                                                                if (newlen + agrep_outpointer >= agrep_outlen) {\
 
1720
                                                                        OUTPUT_OVERFLOW;\
 
1721
                                                                        return -1;\
 
1722
                                                                }\
 
1723
                                                                agrep_outpointer += newlen;\
 
1724
                                                        }\
 
1725
                                                }\
 
1726
/*#if     MEASURE_TIMES\
 
1727
                                                gettimeofday(&finalt, NULL);\
 
1728
                                                OUTFILTER_ms +=  (finalt.tv_sec* 1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);\
 
1729
*/ /*#endif  MEASURE_TIMES*/\
 
1730
                                                }\
 
1731
                                                else if (PRINTED) {\
 
1732
                                                        if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);\
 
1733
                                                        else agrep_outbuffer[agrep_outpointer ++] = '\n';\
 
1734
                                                        PRINTED = 0;\
 
1735
                                                }\
 
1736
                                                if ((change_text) && MULTI_OUTPUT) {     /* next match starting from end of current */\
 
1737
                                                        CurrentByteOffset += (oldtext + tc_pat_len[pat_index] - 1 - text);\
 
1738
                                                        text = oldtext + tc_pat_len[pat_index] - 1;\
 
1739
                                                        MATCHED = 0;\
 
1740
                                                }\
 
1741
                                                else if (change_text) {\
 
1742
                                                        CurrentByteOffset += textbegin - text;\
 
1743
                                                        text = textbegin;\
 
1744
                                                }\
 
1745
                                        }\
 
1746
                                        else {  /* INVERSE: Don't care about filtering time */\
 
1747
                                                /* if(lastout < curtextbegin) OUT=1; */\
 
1748
                                                if (!SILENT) {\
 
1749
                                                if (agrep_finalfp != NULL)\
 
1750
                                                        newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH);\
 
1751
                                                else {\
 
1752
                                                        if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {\
 
1753
                                                                if (newlen + agrep_outpointer >= agrep_outlen) {\
 
1754
                                                                        OUTPUT_OVERFLOW;\
 
1755
                                                                        return -1;\
 
1756
                                                                }\
 
1757
                                                                agrep_outpointer += newlen;\
 
1758
                                                        }\
 
1759
                                                }\
 
1760
                                                }\
 
1761
                                                lastout=textbegin;\
 
1762
                                                if (change_text) {\
 
1763
                                                        CurrentByteOffset += textbegin - text;\
 
1764
                                                        text = textbegin;\
 
1765
                                                }\
 
1766
                                        }\
 
1767
                                }\
 
1768
                                else if (change_text) {\
 
1769
                                        CurrentByteOffset += textbegin - text;\
 
1770
                                        text = textbegin;\
 
1771
                                }\
 
1772
                                if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||\
 
1773
                                    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0;   /* done */\
 
1774
 
 
1775
                                DO_OUTPUT(1)
 
1776
                        }
 
1777
 
 
1778
                skip_output:
 
1779
                        if(MATCHED && !MULTI_OUTPUT && !AComplexBoolean) break;     /* else look for more possible matches */
 
1780
                        if (DOWITHMASK && (text >= curtextend - 1)) {
 
1781
                                DOWITHMASK = 0;
 
1782
                                if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
 
1783
                                        DO_OUTPUT(0)
 
1784
                                }
 
1785
                                if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1786
                        }
 
1787
                }
 
1788
                /* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */
 
1789
                if (DOWITHMASK && (text >= curtextend - 1)) {
 
1790
                        DOWITHMASK = 0;
 
1791
                        if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
 
1792
                                DO_OUTPUT(0)
 
1793
                        }
 
1794
                        if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1795
                }
 
1796
                if (MATCHED) text--;
 
1797
                MATCHED = 0;
 
1798
        } /* while */
 
1799
        CurrentByteOffset ++;
 
1800
 
 
1801
        /* Do residual stuff: check if there was a match at the end of the line | check if rest of the buffer needs to be output due to inverse */
 
1802
 
 
1803
        if (DOWITHMASK && (text >= curtextend - 1)) {
 
1804
                DOWITHMASK = 0;
 
1805
                if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
 
1806
                        DO_OUTPUT(0)
 
1807
                }
 
1808
                if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
 
1809
        }
 
1810
 
 
1811
        if (INVERSE && !COUNT && (lastout <= textend)) {
 
1812
                if (!SILENT) {
 
1813
                if (agrep_finalfp != NULL)
 
1814
                        newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH);
 
1815
                else {
 
1816
                        if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
 
1817
                                if (newlen + agrep_outpointer >= agrep_outlen) {
 
1818
                                        OUTPUT_OVERFLOW;
 
1819
                                        return -1;
 
1820
                                }
 
1821
                                agrep_outpointer += newlen;
 
1822
                        }
 
1823
                }
 
1824
                }
 
1825
        }
 
1826
 
 
1827
        return 0;
 
1828
}
 
1829
#endif  /*DOTCOMPRESSED*/
 
1830
 
 
1831
static void
 
1832
f_prep(pat_index, Pattern)
 
1833
uchar *Pattern;   int pat_index;
 
1834
{
 
1835
int i, m;
 
1836
register unsigned hash=0;
 
1837
#ifdef debug
 
1838
        puts(Pattern);
 
1839
#endif
 
1840
        m = p_size;
 
1841
                for (i=m-1; i>=(1+LONG); i--) {
 
1842
                                hash = (tr1[Pattern[i]]);
 
1843
                                hash = (hash << Hbits) + (tr1[Pattern[i-1]]);
 
1844
                if(LONG) hash = (hash << Hbits) + (tr1[Pattern[i-2]] );
 
1845
                if(SHIFT1[hash] >= m-1-i) SHIFT1[hash] = m-1-i;
 
1846
        }
 
1847
        i=m-1;
 
1848
                hash = (tr1[Pattern[i]]);
 
1849
                hash = (hash << Hbits) + (tr1[Pattern[i-1]]);
 
1850
        if(LONG) hash = (hash << Hbits) + (tr1[Pattern[i-2]] );
 
1851
                if(SHORT) hash=tr[Pattern[0]];
 
1852
#ifdef debug
 
1853
        printf("hash = %d\n", hash);
 
1854
#endif
 
1855
                HASH[hash]++;
 
1856
                return;
 
1857
}
 
1858
 
 
1859
#if     DOTCOMPRESSED
 
1860
static void
 
1861
tc_f_prep(pat_index, Pattern)
 
1862
uchar *Pattern;   int pat_index;
 
1863
{
 
1864
int i, m;
 
1865
register unsigned hash=0;
 
1866
#ifdef debug
 
1867
        puts(Pattern);
 
1868
#endif
 
1869
        m = tc_p_size;
 
1870
                for (i=m-1; i>=(1+tc_LONG); i--) {
 
1871
                                hash = (tc_tr1[Pattern[i]]);
 
1872
                                hash = (hash << Hbits) + (tc_tr1[Pattern[i-1]]);
 
1873
                if(tc_LONG) hash = (hash << Hbits) + (tc_tr1[Pattern[i-2]] );
 
1874
                if(tc_SHIFT1[hash] >= m-1-i) tc_SHIFT1[hash] = m-1-i;
 
1875
        }
 
1876
        i=m-1;
 
1877
                hash = (tc_tr1[Pattern[i]]);
 
1878
                hash = (hash << Hbits) + (tc_tr1[Pattern[i-1]]);
 
1879
        if(tc_LONG) hash = (hash << Hbits) + (tc_tr1[Pattern[i-2]] );
 
1880
                if(tc_SHORT) hash=tc_tr[Pattern[0]];
 
1881
#ifdef debug
 
1882
        printf("hash = %d\n", hash);
 
1883
#endif
 
1884
                tc_HASH[hash]++;
 
1885
                return;
 
1886
}
 
1887
#endif  /*DOTCOMPRESSED*/
 
1888
 
 
1889
static void
 
1890
f_prep1(pat_index, Pattern)
 
1891
uchar *Pattern;   int pat_index;
 
1892
{
 
1893
int i, m;
 
1894
int hash2;
 
1895
register unsigned hash;
 
1896
        m = p_size;
 
1897
#ifdef debug
 
1898
        puts(Pattern);
 
1899
#endif
 
1900
                for (i=m-1; i>=(1+LONG); i--) {
 
1901
                                hash = (tr1[Pattern[i]]);
 
1902
                                hash = (hash << Hbits) + (tr1[Pattern[i-1]]);
 
1903
                if(LONG) hash = (hash << Hbits) + (tr1[Pattern[i-2]] );
 
1904
                if(SHIFT1[hash] >= m-1-i) SHIFT1[hash] = m-1-i;
 
1905
        }
 
1906
        i=m-1;
 
1907
                hash = (tr1[Pattern[i]]);
 
1908
                hash = (hash << Hbits) + (tr1[Pattern[i-1]]);
 
1909
        if(LONG) hash = (hash << Hbits) + (tr1[Pattern[i-2]] );
 
1910
                if(SHORT) hash=tr[Pattern[0]];
 
1911
        hash2 = (tr[Pattern[0]] << 8) + tr[Pattern[1]];
 
1912
#ifdef debug
 
1913
        printf("hash = %d, HASH[hash] = %d\n", hash, HASH[hash]);
 
1914
#endif
 
1915
                PatPtr[HASH[hash]] = Pattern;
 
1916
                pat_indices[HASH[hash]] = pat_index;
 
1917
        Hash2[HASH[hash]] = hash2;
 
1918
                HASH[hash]--;
 
1919
                return;
 
1920
}
 
1921
 
 
1922
#if     DOTCOMPRESSED
 
1923
static void
 
1924
tc_f_prep1(pat_index, Pattern)
 
1925
uchar *Pattern;   int pat_index;
 
1926
{
 
1927
int i, m;
 
1928
int hash2;
 
1929
register unsigned hash;
 
1930
        m = tc_p_size;
 
1931
#ifdef debug
 
1932
        puts(Pattern);
 
1933
#endif
 
1934
                for (i=m-1; i>=(1+tc_LONG); i--) {
 
1935
                                hash = (tc_tr1[Pattern[i]]);
 
1936
                                hash = (hash << Hbits) + (tc_tr1[Pattern[i-1]]);
 
1937
                if(tc_LONG) hash = (hash << Hbits) + (tc_tr1[Pattern[i-2]] );
 
1938
                if(tc_SHIFT1[hash] >= m-1-i) tc_SHIFT1[hash] = m-1-i;
 
1939
        }
 
1940
        i=m-1;
 
1941
                hash = (tc_tr1[Pattern[i]]);
 
1942
                hash = (hash << Hbits) + (tc_tr1[Pattern[i-1]]);
 
1943
        if(tc_LONG) hash = (hash << Hbits) + (tc_tr1[Pattern[i-2]] );
 
1944
                if(tc_SHORT) hash=tc_tr[Pattern[0]];
 
1945
        hash2 = (tc_tr[Pattern[0]] << 8) + tc_tr[Pattern[1]];
 
1946
#ifdef debug
 
1947
        printf("hash = %d, tc_HASH[hash] = %d\n", hash, tc_HASH[hash]);
 
1948
#endif
 
1949
                tc_PatPtr[tc_HASH[hash]] = Pattern;
 
1950
                tc_pat_indices[tc_HASH[hash]] = pat_index;
 
1951
        tc_Hash2[tc_HASH[hash]] = hash2;
 
1952
                tc_HASH[hash]--;
 
1953
                return;
 
1954
}
 
1955
#endif  /*DOTCOMPRESSED*/
 
1956
 
 
1957
static void
 
1958
accumulate()
 
1959
{
 
1960
        int i;
 
1961
 
 
1962
        for(i=1; i<MAXHASH; i++)  {
 
1963
        /*
 
1964
        printf("%d, ", HASH[i]);
 
1965
        */
 
1966
        HASH[i] = HASH[i-1] + HASH[i];
 
1967
        }
 
1968
        HASH[0] = 0;
 
1969
        return;
 
1970
}
 
1971
 
 
1972
#if     DOTCOMPRESSED
 
1973
static void
 
1974
tc_accumulate()
 
1975
{
 
1976
        int i;
 
1977
 
 
1978
        for(i=1; i<MAXHASH; i++)  {
 
1979
        /*
 
1980
        printf("%d, ", HASH[i]);
 
1981
        */
 
1982
        tc_HASH[i] = tc_HASH[i-1] + tc_HASH[i];
 
1983
        }
 
1984
        tc_HASH[0] = 0;
 
1985
        return;
 
1986
}
 
1987
#endif  /*DOTCOMPRESSED*/
 
1988
 
 
1989
/* Compute duplicate strings using tr's info, not strcmp! */
 
1990
void
 
1991
acompute_duplicates(aduplicates, aterminals, anum_terminals, tr)
 
1992
        char    aduplicates[MAXNUM_PAT][MAXNUM_PAT];
 
1993
        ParseTree aterminals[];
 
1994
        int     anum_terminals;
 
1995
        char    tr[256];
 
1996
{
 
1997
        int     i, j, k, leni, lenj, initk;
 
1998
 
 
1999
        for (i=0; i<MAXNUM_PAT; i++) memset(aduplicates[i], '\0', MAXNUM_PAT);
 
2000
        for (i=0; i<anum_terminals; i++) {
 
2001
                leni = strlen(aterminals[i].data.leaf.value);
 
2002
                for (j=i; j<anum_terminals; j++) {
 
2003
                        if (i==j) {
 
2004
                                aduplicates[i][j] = 1;
 
2005
                                continue;
 
2006
                        }
 
2007
                        lenj = strlen(aterminals[j].data.leaf.value);
 
2008
                        if (lenj != leni) continue;
 
2009
                        for (k=0; k<lenj; k++) {
 
2010
                                if (tr[aterminals[i].data.leaf.value[k]] != tr[aterminals[j].data.leaf.value[k]]) break;
 
2011
                        }
 
2012
                        if (k < lenj) continue;
 
2013
                        aduplicates[i][j] = 1;
 
2014
                        aduplicates[j][i] = 1;
 
2015
                }
 
2016
        }
 
2017
}