68
CHARTYPE *pat; int fd, m, D;
70
CHARTYPE text[BLOCKSIZE+2*MAXLINE+MAXPATT]; /* input text stream */
71
int offset = 2*MAXLINE;
72
int buf_end, num_read, i, start, end, residue = 0;
73
if(pat[0] == '^' || pat[0] == '$') pat[0] = '\n';
74
if(pat[m-1] == '^' || pat[m-1] == '$') pat[m-1] = '\n';
75
char_tr(pat, &m); /* will change pat, and m if WHOLELINE is ON */
76
text[offset-1] = '\n'; /* initial case */
77
for(i=0; i < MAXLINE; i++) text[i] = 0; /* security zone */
79
if(WHOLELINE) start--;
81
fprintf(stderr, "%s: pattern too long\n", Progname);
85
if(m > LONG_EXAC) m_preprocess(pat);
88
else if (DNA) prep4(pat, m);
89
else if(m >= LONG_APPX) am_preprocess(pat);
92
initmask(pat, Mask, m, 0, &endposition);
94
for(i=1; i<=m; i++) text[BLOCKSIZE+offset+i] = pat[m-1];
95
/* to make sure the skip loop in bm() won't go out of bound */
96
while( (num_read = read(fd, text+offset, BLOCKSIZE)) > 0)
98
buf_end = end = offset + num_read -1 ;
99
while(text[end] != '\n' && end > offset) end--;
100
residue = buf_end - end + 1 ;
101
text[start-1] = '\n';
103
if(m > LONG_EXAC) monkey(pat, m, text+start, text+end);
104
else bm(pat, m, text+start, text+end);
107
if(DNA) monkey4( pat, m, text+start, text+end, D );
109
if(m >= LONG_APPX) a_monkey(pat, m, text+start, text+end, D);
110
else agrep(pat, m, text+start, text+end, D);
113
if(FILENAMEONLY && num_of_matched) {
114
printf("%s\n", CurrentFileName);
116
start = offset - residue ;
117
if(start < MAXLINE) {
120
strncpy(text+start, text+end, residue);
122
} /* end of while(num_read = ... */
172
sgrep(in_pat, in_m, fd, D, samepattern)
176
CHARTYPE patbuf[MAXLINE];
177
CHARTYPE *pat = patbuf;
179
CHARTYPE *text; /* input text stream */
180
int offset = 2*Max_record;
181
int buf_end, num_read, i, start, end, residue = 0;
183
CHARTYPE *oldpat = pat;
185
static CHARTYPE newpat[MAXLINE]; /* holds compressed version */
188
static struct timeval initt, finalt;
191
int oldCurrentByteOffset;
193
strncpy(pat, in_pat, MAXLINE);
194
pat[MAXLINE-1] = '\0';
196
#define PROCESS_PATTERN \
198
if( (pat[0] == '^') || (pat[0] == '$') ) pat[0] = '\n';\
199
if ((m>1) && (pat[m-2] != '\\') && ((pat[m-1] == '^') || (pat[m-1] == '$'))) pat[m-1] = '\n';\
201
/* whether constant or not, interpret the escape character */\
202
for (k=0; k<m; k++) {\
203
if (pat[k] == '\\') {\
205
pat[j] = pat[j+1]; /* including '\0' */\
209
char_tr(pat, &m); /* will change pat, and m if WHOLELINE is ON */\
211
fprintf(stderr, "%s: pattern too long (has > %d chars)\n", Progname, MAXPATT);\
213
errno = AGREP_ERROR;\
219
if(m > LONG_EXAC) m_preprocess(pat);\
220
else prep_bm(pat, m);\
222
else if (DNA) prep4(pat, m);\
223
else if(m >= LONG_APPX) am_preprocess(pat);\
226
initmask(pat, Mask, m, 0, &endposition);\
231
#endif /*AGREP_POINTER*/
232
alloc_buf(fd, &text, 2*BlockSize+2*Max_record+MAXPATT);
233
text[offset-1] = '\n'; /* initial case */
234
for(i=0; i < Max_record; i++) text[i] = 0; /* security zone */
238
CurrentByteOffset --;
241
while( (num_read = fill_buf(fd, text+offset, 2*BlockSize)) > 0)
243
buf_end = end = offset + num_read -1 ;
244
oldCurrentByteOffset = CurrentByteOffset;
247
if ((TCOMPRESSED == ON) && tuncompressible(text+offset, num_read)) {
248
EASYSEARCH = text[offset+SIGNATURE_LEN-1];
249
start += SIGNATURE_LEN;
250
CurrentByteOffset += SIGNATURE_LEN;
252
fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName);
255
gettimeofday(&initt, NULL);
256
#endif /*MEASURE_TIMES*/
257
if (samepattern || ((newm = quick_tcompress(FREQ_FILE, HASH_FILE, pat, m, newpat, Max_record-8, EASYSEARCH)) > 0)) {
264
gettimeofday(&finalt, NULL);
265
INFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
266
#endif /*MEASURE_TIMES*/
268
else TCOMPRESSED = OFF;
270
PROCESS_PATTERN /* must be AFTER we know that it is a compressed pattern... */
272
for(i=1; i<=m; i++) text[2*BlockSize+offset+i] = pat[m-1];
273
/* to make sure the skip loop in bm() won't go out of bound in later iterations */
278
while ((text[end] != '\n') && (end > offset)) end--;
279
text[start-1] = '\n';
282
unsigned char *newbuf = text + end + 1;
283
newbuf = backward_delimiter(newbuf, text+offset, D_pattern, D_length, OUTTAIL); /* see agrep.c/'d' */
284
if (newbuf < text+offset+D_length) newbuf = text + end + 1;
285
end = newbuf - text - 1;
286
memcpy(text+start-D_length, D_pattern, D_length);
288
residue = buf_end - end + 1 ;
291
/* No harm in sending a few extra parameters even if they are unused: they are not accessed in monkey*()s */
294
if (-1 == monkey(pat, m, text+start, text+end, oldpat, oldm)) {
300
if (-1 == bm(pat, m, text+start, text+end, oldpat, oldm)) {
308
if (-1 == monkey4( pat, m, text+start, text+end, D , oldpat, oldm )) {
315
if (-1 == a_monkey(pat, m, text+start, text+end, D, oldpat, oldm)) {
321
if (-1 == agrep(pat, m, text+start, text+end, D, oldpat, oldm)) {
328
if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {
329
if (agrep_finalfp != NULL)
330
fprintf(agrep_finalfp, "%s", CurrentFileName);
333
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
334
(CurrentFileName[outindex] != '\0'); outindex++) {
335
agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
337
if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
342
agrep_outpointer += outindex;
345
char *s = aprint_file_time(CurrentFileTime);
346
if (agrep_finalfp != NULL)
347
fprintf(agrep_finalfp, "%s", s);
350
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
351
(s[outindex] != '\0'); outindex++) {
352
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
354
if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
359
agrep_outpointer += outindex;
362
if (agrep_finalfp != NULL)
363
fprintf(agrep_finalfp, "\n");
365
if (agrep_outpointer+1>=agrep_outlen) {
370
else agrep_outbuffer[agrep_outpointer++] = '\n';
378
CurrentByteOffset = oldCurrentByteOffset + end - start + 1; /* for a new iteration: avoid complicated calculations below */
379
start = offset - residue ;
380
if(start < Max_record) {
383
/* strncpy(text+start, text+end, residue); */
384
memcpy(text+start, text+end, residue);
386
if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
387
((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
391
} /* end of while(num_read = ...) */
393
text[start-1] = '\n';
394
text[start+residue] = '\n';
397
if (start > D_length) memcpy(text+start-D_length, D_pattern, D_length);
398
memcpy(text+start+residue, D_pattern, D_length);
400
end = start + residue - 2;
403
/* No harm in sending a few extra parameters even if they are unused: they are not accessed in monkey*()s */
406
if (-1 == monkey(pat, m, text+start, text+end, oldpat, oldm)) {
412
if (-1 == bm(pat, m, text+start, text+end, oldpat, oldm)) {
420
if (-1 == monkey4( pat, m, text+start, text+end, D , oldpat, oldm )) {
427
if (-1 == a_monkey(pat, m, text+start, text+end, D, oldpat, oldm)) {
433
if (-1 == agrep(pat, m, text+start, text+end, D, oldpat, oldm)) {
440
if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {
441
if (agrep_finalfp != NULL)
442
fprintf(agrep_finalfp, "%s", CurrentFileName);
445
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
446
(CurrentFileName[outindex] != '\0'); outindex++) {
447
agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
449
if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
454
agrep_outpointer += outindex;
457
char *s = aprint_file_time(CurrentFileTime);
458
if (agrep_finalfp != NULL)
459
fprintf(agrep_finalfp, "%s", s);
462
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
463
(s[outindex] != '\0'); outindex++) {
464
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
466
if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
471
agrep_outpointer += outindex;
474
if (agrep_finalfp != NULL)
475
fprintf(agrep_finalfp, "\n");
477
if (agrep_outpointer+1>=agrep_outlen) {
482
else agrep_outbuffer[agrep_outpointer++] = '\n';
494
else { /* as if only one iteration of the while-loop and offset = 0 */
495
tempbuf = (CHARTYPE*)malloc(m);
496
text = (CHARTYPE *)agrep_inbuffer;
497
num_read = agrep_inlen;
499
buf_end = end = num_read - 1;
503
CurrentByteOffset --;
506
if ((TCOMPRESSED == ON) && tuncompressible(text+1, num_read)) {
507
EASYSEARCH = text[offset+SIGNATURE_LEN-1];
508
start += SIGNATURE_LEN;
509
CurrentByteOffset += SIGNATURE_LEN;
511
fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName);
514
gettimeofday(&initt, NULL);
515
#endif /*MEASURE_TIMES*/
516
if (samepattern || ((newm = quick_tcompress(FREQ_FILE, HASH_FILE, pat, m, newpat, Max_record-8, EASYSEARCH)) > 0)) {
523
gettimeofday(&finalt, NULL);
524
INFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
525
#endif /*MEASURE_TIMES*/
527
else TCOMPRESSED = OFF;
529
PROCESS_PATTERN /* must be after we know whether it is compressed or not */
531
memcpy(tempbuf, text+end+1, m); /* save portion being overwritten */
532
for(i=1; i<=m; i++) text[end+i] = pat[m-1];
533
/* to make sure the skip loop in bm() won't go out of bound in later iterations */
536
while(text[end] != '\n' && end > 1) end--;
538
unsigned char *newbuf = text + end + 1;
539
newbuf = backward_delimiter(newbuf, text, D_pattern, D_length, OUTTAIL); /* see agrep.c/'d' */
540
if (newbuf < text+offset+D_length) newbuf = text + end + 1;
541
end = newbuf - text - 1;
543
/* text[0] = text[end] = r_newline; : the user must ensure that the delimiter is there at text[0] and occurs somewhere before text[end ] */
545
/* An exact copy of the above SGREP_PROCESS */
546
/* No harm in sending a few extra parameters even if they are unused: they are not accessed in monkey*()s */
549
if (-1 == monkey(pat, m, text+start, text+end, oldpat, oldm)) {
551
memcpy(text+end+1, tempbuf, m); /* restore */
557
if (-1 == bm(pat, m, text+start, text+end, oldpat, oldm)) {
559
memcpy(text+end+1, tempbuf, m); /* restore */
567
if (-1 == monkey4( pat, m, text+start, text+end, D , oldpat, oldm )) {
569
memcpy(text+end+1, tempbuf, m); /* restore */
576
if (-1 == a_monkey(pat, m, text+start, text+end, D, oldpat, oldm)) {
578
memcpy(text+end+1, tempbuf, m); /* restore */
584
if (-1 == agrep(pat, m, text+start, text+end, D, oldpat, oldm)) {
586
memcpy(text+end+1, tempbuf, m); /* restore */
593
if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) { /* externally set */
594
if (agrep_finalfp != NULL)
595
fprintf(agrep_finalfp, "%s", CurrentFileName);
598
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
599
(CurrentFileName[outindex] != '\0'); outindex++) {
600
agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
602
if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
607
agrep_outpointer += outindex;
610
char *s = aprint_file_time(CurrentFileTime);
611
if (agrep_finalfp != NULL)
612
fprintf(agrep_finalfp, "%s", s);
615
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
616
(s[outindex] != '\0'); outindex++) {
617
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
619
if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
624
agrep_outpointer += outindex;
627
if (agrep_finalfp != NULL)
628
fprintf(agrep_finalfp, "\n");
630
if (agrep_outpointer+1>=agrep_outlen) {
635
else agrep_outbuffer[agrep_outpointer++] = '\n';
642
memcpy(text+end+1, tempbuf, m); /* restore */
646
#endif /*AGREP_POINTER*/
124
647
} /* end sgrep */
126
649
/* SUN: bm assumes that the content of text[n]...text[n+m-1] is
127
650
pat[m-1] such that the skip loop is guaranteed to terminated */
129
bm(pat, m, text, textend)
130
CHARTYPE *text, *textend, *pat; int m;
653
bm(pat, m, text, textend, oldpat, oldm)
654
CHARTYPE *text, *textend, *pat, *oldpat;
133
register int m1, j, d1;
136
printf("%d\t", textend - text);
137
printf("%c, %c", *text, *textend);
140
d1 = shift_1; /* at least 1 */
143
while (text <= textend) {
144
shift = SHIFT[*(text += shift)];
146
shift = SHIFT[*(text += shift)];
147
shift = SHIFT[*(text += shift)];
148
shift = SHIFT[*(text += shift)];
659
register int m1, j, d1;
660
CHARTYPE *textbegin = text;
663
CHARTYPE *curtextbegin;
664
CHARTYPE *curtextend;
666
struct timeval initt, finalt;
668
CHARTYPE *lastout = text;
670
d1 = shift_1; /* at least 1 */
673
while (text <= textend) {
675
shift = SHIFT[*(text += shift)];
677
shift = SHIFT[*(text += shift)];
678
shift = SHIFT[*(text += shift)];
679
shift = SHIFT[*(text += shift)];
681
CurrentByteOffset += text - textstart;
151
683
while(TR[pat[m1 - j]] == TR[*(text - j)]) {
152
if(++j == m) break; /* if statement can be
153
saved, but for safty ... */
684
if(++j == m) break; /* if statement can be saved, but for safty ... */
156
if(text > textend) return;
687
if(text > textend) return 0;
158
if(TR[*(text+1)] != W_DELIM) goto CONT;
159
if(TR[*(text-m)] != W_DELIM) goto CONT;
689
/* if(isalnum(*(unsigned char *)(text+1))) goto CONT; --> fixed by SHIOZAKI Takehiko <takehi-s@ascii.co.jp> */
690
if((text+1 <= textend) && isalnum(*(unsigned char *)(text+1)) && isalnum(*(unsigned char *)text)) {
691
shift = 1; /* bg 4/27/97 */
692
goto WCONT; /* as if there was no match */
694
/* if(isalnum(*(unsigned char *)(text-m))) goto CONT; --> fixed by SHIOZAKI Takehiko <takehi-s@ascii.co.jp> */
695
if((textbegin <= (text-m)) && isalnum(*(unsigned char *)(text-m)) && isalnum(*(unsigned char *)(text-m+1))) {
696
shift = 1; /* bg 4/27/97 */
697
goto WCONT; /* as if there was no match */
699
/* changed by Udi 11/7/94 to avoid having to set TR[] to W_delim */
702
if (TCOMPRESSED == ON) {
703
/* Don't update CurrentByteOffset here: only before outputting properly */
705
curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
706
if (*curtextbegin == '\n') curtextbegin ++;
707
curtextend = curtextbegin; /*text-m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
708
if (*curtextend == '\n') curtextend ++;
711
curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL);
713
curtextend = forward_delimiter(curtextbegin+D_length/*text-m*/, textend, tc_D_pattern, tc_D_length, OUTTAIL);
715
curtextend = forward_delimiter(curtextbegin/*text-m*/, textend, tc_D_pattern, tc_D_length, OUTTAIL);
720
/* Don't update CurrentByteOffset here: only before outputting properly */
722
curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
723
if (*curtextbegin == '\n') curtextbegin ++;
724
curtextend = curtextbegin /*text-m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
725
if (*curtextend == '\n') curtextend ++;
728
curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL);
730
curtextend = forward_delimiter(curtextbegin+D_length/*text-m*/, textend, D_pattern, D_length, OUTTAIL);
732
curtextend = forward_delimiter(curtextbegin/*text-m*/, textend, D_pattern, D_length, OUTTAIL);
737
if (TCOMPRESSED == ON) {
739
gettimeofday(&initt, NULL);
740
#endif /*MEASURE_TIMES*/
741
if (-1 == exists_tcompressed_word(pat, m, curtextbegin, text - curtextbegin + m, EASYSEARCH))
742
goto CONT; /* as if there was no match */
744
gettimeofday(&finalt, NULL);
745
FILTERALGO_ms += (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
746
#endif /*MEASURE_TIMES*/
749
textbegin = curtextend; /* (curtextend - 1 > textbegin ? curtextend - 1 : curtextend); */
161
750
num_of_matched++;
162
if(FILENAMEONLY) return;
164
if(FNAME) printf("%s: ", CurrentFileName);
165
while(*(--text) != '\n');
166
while(*(++text) != '\n') putchar(*(text));
169
else { while(*text != '\n') text++; }
751
if(FILENAMEONLY) return 0;
754
if(FNAME && (NEW_FILE || !POST_FILTER)) {
755
char nextchar = (POST_FILTER == ON)?'\n':' ';
756
char *prevstring = (POST_FILTER == ON)?"\n":"";
758
if (agrep_finalfp != NULL)
759
fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName);
762
if (prevstring[0] != '\0') {
763
if(agrep_outpointer + 1 >= agrep_outlen) {
767
else agrep_outbuffer[agrep_outpointer ++] = prevstring[0];
769
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
770
(CurrentFileName[outindex] != '\0'); outindex++) {
771
agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
773
if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
777
agrep_outpointer += outindex;
780
char *s = aprint_file_time(CurrentFileTime);
781
if (agrep_finalfp != NULL)
782
fprintf(agrep_finalfp, "%s", s);
785
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
786
(s[outindex] != '\0'); outindex++) {
787
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
789
if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
793
agrep_outpointer += outindex;
796
if (agrep_finalfp != NULL)
797
fprintf(agrep_finalfp, ":%c", nextchar);
799
if (agrep_outpointer+2>= agrep_outlen) {
804
agrep_outbuffer[agrep_outpointer++] = ':';
805
agrep_outbuffer[agrep_outpointer++] = nextchar;
814
if (agrep_finalfp != NULL)
815
fprintf(agrep_finalfp, "%d= ", CurrentByteOffset);
819
sprintf(s, "%d=", CurrentByteOffset);
820
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
821
(s[outindex] != '\0'); outindex++) {
822
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
824
if (s[outindex] != '\0') {
828
agrep_outpointer += outindex;
834
if (agrep_finalfp != NULL)
835
fprintf(agrep_finalfp, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);
839
sprintf(s, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);
840
for (outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
841
(s[outindex] != '\0'); outindex ++) {
842
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
844
if (s[outindex] != '\0') {
848
agrep_outpointer += outindex;
853
CurrentByteOffset += textbegin - text;
858
if (TCOMPRESSED == ON) {
860
gettimeofday(&initt, NULL);
861
#endif /*MEASURE_TIMES*/
862
if (agrep_finalfp != NULL)
863
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_finalfp, -1, EASYSEARCH);
865
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
866
if (agrep_outpointer + newlen + 1 >= agrep_outlen) {
870
agrep_outpointer += newlen;
874
gettimeofday(&finalt, NULL);
875
OUTFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
876
#endif /*MEASURE_TIMES*/
879
if (agrep_finalfp != NULL) {
880
fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp);
883
if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) {
887
memcpy(agrep_outbuffer+agrep_outpointer, curtextbegin, curtextend-curtextbegin);
888
agrep_outpointer += curtextend - curtextbegin;
893
if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);
894
else agrep_outbuffer[agrep_outpointer ++] = '\n';
900
if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
901
if (agrep_finalfp != NULL)
902
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH);
904
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
905
if (newlen + agrep_outpointer >= agrep_outlen) {
909
agrep_outpointer += newlen;
913
CurrentByteOffset += textbegin - text;
916
else { /* NOT TCOMPRESSED */
917
if (agrep_finalfp != NULL)
918
fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp);
920
if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) {
924
memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout);
925
agrep_outpointer += (curtextbegin - lastout);
928
CurrentByteOffset += textbegin - text;
935
CurrentByteOffset += textbegin - text;
938
if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
939
((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */
942
if (m == 1) shift = 0; else shift = 1; /* ZZZZZZZZZZZZZZZZ check it out later */
948
if (!SILENT && INVERSE && !COUNT && (lastout <= textend)) {
949
if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
950
if (agrep_finalfp != NULL)
951
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH);
953
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
954
if (newlen + agrep_outpointer >= agrep_outlen) {
958
agrep_outpointer += newlen;
962
else { /* NOT TCOMPRESSED */
963
if (agrep_finalfp != NULL)
964
fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp);
966
if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) {
970
memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout + 1);
971
agrep_outpointer += (textend - lastout + 1);
179
979
/* initmask() initializes the mask table for the pattern */
180
980
/* endposition is a mask for the endposition of the pattern */
181
981
/* endposition will contain k mask bits if the pattern contains k fragments */
182
983
initmask(pattern, Mask, m, D, endposition)
183
CHARTYPE *pattern; unsigned *Mask; register int m, D; unsigned *endposition;
987
unsigned *endposition;
185
register unsigned Bit1, c;
186
register int i, j, frag_num;
989
register unsigned Bit1, c;
990
register int i, j, frag_num;
188
Bit1 = 1 << 31; /* the first bit of Bit1 is 1, others 0. */
189
frag_num = D+1; *endposition = 0;
190
for (i = 0; i < frag_num; i++) *endposition = *endposition | (Bit1 >> i);
191
*endposition = *endposition >> (m - frag_num);
192
for(i = 0; i < m; i++)
193
if (pattern[i] == '^' || pattern[i] == '$') {
196
for(i = 0; i < MAXSYM; i++) Mask[i] = ~0;
197
for(i = 0; i < m; i++) /* initialize the mask table */
199
for ( j = 0; j < m; j++)
200
if( c == pattern[j] )
201
Mask[c] = Mask[c] & ~( Bit1 >> j ) ;
992
/* Bit1 = 1 << 31;*/ /* the first bit of Bit1 is 1, others 0. */
993
Bit1 = (unsigned)0x80000000;
996
for (i = 0; i < frag_num; i++) *endposition = *endposition | (Bit1 >> i);
997
*endposition = *endposition >> (m - frag_num);
998
for(i = 0; i < m; i++)
999
if (pattern[i] == '^' || pattern[i] == '$') {
1002
for(i = 0; i < MAXSYM; i++) Mask[i] = ~0;
1003
for(i = 0; i < m; i++) /* initialize the mask table */
1006
for ( j = 0; j < m; j++)
1007
if( c == pattern[j] )
1008
Mask[c] = Mask[c] & ~( Bit1 >> j ) ;
205
1013
prep(Pattern, M, D) /* preprocessing for partitioning_bm */
206
CHARTYPE *Pattern; /* can be fine-tuned to choose a better partition */
1014
CHARTYPE *Pattern; /* can be fine-tuned to choose a better partition */
209
register int i, j, k, p, shift;
211
unsigned hash, b_size = 3;
1017
register int i, j, k, p, shift;
1018
register unsigned m;
1019
unsigned hash, b_size = 3;
213
1021
p = M - m*(D+1);
214
1022
for (i = 0; i < MAXSYM; i++) SHIFT[i] = m;
215
1023
for (i = M-1; i>=p ; i--) {
216
1024
shift = (M-1-i)%m;
217
1025
hash = Pattern[i];
218
if(SHIFT[hash] > shift) SHIFT[hash] = shift;
1026
if((int)(SHIFT[hash]) > (int)(shift)) SHIFT[hash] = shift;
221
1029
for(i=0; i<M; i++) printf(" %d,", SHIFT[Pattern[i]]);
243
1051
hash = (hash << 2) + Pattern[j-k];
246
printf(" hash = %d,", hash);
1054
printf(" hash = %d,", hash);
248
1056
MEMBER[hash] = 1;
253
agrep( pat, M, text, textend, D )
254
int M, D ; register CHARTYPE *text, *textend, *pat;
1061
agrep( pat, M, text, textend, D, oldpat, oldM)
1063
register CHARTYPE *text, *textend, *pat, *oldpat;
257
register int m = M/(D+1);
258
register CHARTYPE *textstart;
259
register int shift, HASH;
262
int Candidate[MaxCan][2], round, lastend=0;
263
unsigned R1[MaxError+1], R2[MaxError+1];
264
register unsigned int r1, endpos, c;
1066
register int m = M/(D+1);
1067
register CHARTYPE *textbegin;
1068
CHARTYPE *textstart;
1069
register int shift, HASH;
1072
int Candidate[MaxCan][2], round, lastend=0;
1073
unsigned R1[MaxError+1], R2[MaxError+1];
1074
register unsigned int r1, endpos, c;
1075
unsigned currentpos;
1079
CHARTYPE *lastout = text;
269
Candidate[0][0] = Candidate[0][1] = 0;
276
while (text < textend) {
277
shift = SHIFT[*(text += shift)];
279
shift = SHIFT[*(text += shift)];
280
shift = SHIFT[*(text += shift)];
283
while(j < r1) { HASH = (HASH << 2) + *(text-j);
286
i = text - textstart;
287
if((i - M - D - 10) > Candidate[cdx][1]) {
1082
Candidate[0][0] = Candidate[0][1] = 0;
1089
while (text < textend) {
1091
shift = SHIFT[*(text += shift)];
1093
shift = SHIFT[*(text += shift)];
1094
shift = SHIFT[*(text += shift)];
1096
CurrentByteOffset += text - textstart;
1100
HASH = (HASH << 2) + *(text-j);
1104
i = text - textbegin;
1105
if((i - M - D - 10) > Candidate[cdx][1]) {
288
1106
Candidate[++cdx][0] = i-M-D-2;
289
Candidate[cdx][1] = i+M+D; }
290
else Candidate[cdx][1] = i+M+D;
1107
Candidate[cdx][1] = i+M+D;
1109
else Candidate[cdx][1] = i+M+D;
293
1112
else shift = d1;
298
n = textend - textstart;
300
/* for those candidate areas, find the D-error matches */
301
if(Candidate[1][0] < 0) Candidate[1][0] = 0;
302
endpos = endposition; /* the mask table and the endposition */
304
for(round = 0; round <= cdx; round++)
305
{ i = Candidate[round][0] ;
306
if(Candidate[round][1] > n) Candidate[round][1] = n;
309
printf("round: %d, start=%d, end=%d, ", round, i, Candidate[round][1]);
1115
CurrentByteOffset += (textbegin - text);
1117
n = textend - textbegin;
1119
/* for those candidate areas, find the D-error matches */
1120
if(Candidate[1][0] < 0) Candidate[1][0] = 0;
1121
endpos = endposition; /* the mask table and the endposition */
1122
/* Bit1 = (1 << 31); */
1123
Bit1 = (unsigned)0x80000000;
1124
oldbyteoffset = CurrentByteOffset;
1125
for(round = 0; round <= cdx; round++)
1127
i = Candidate[round][0] ;
1128
if(Candidate[round][1] > n) Candidate[round][1] = n;
1130
CurrentByteOffset = oldbyteoffset+i;
1132
R1[1] = R2[1] = ~Bit1;
1133
for(k = 1; k <= D; k++) R1[k] = R2[k] = (R1[k-1] >> 1) & R1[k-1];
1134
while (i < Candidate[round][1])
1137
CurrentByteOffset ++;
1138
if(c == r_newline) {
1139
for(k = 0 ; k <= D; k++) R1[k] = R2[k] = (~0 );
1142
R1[0] = (R2[0] >> 1) | r1;
1144
R1[k] = ((R2[k] >> 1) | r1) & R2[k-1] & ((R1[k-1] & R2[k-1]) >> 1);
1145
if((R1[D] & endpos) == 0) {
1147
if(FILENAMEONLY) return 0;
1150
CurrentByteOffset += lastend - i;
1154
int oldcurrentpos = currentpos;
1155
if (-1 == s_output(text, ¤tpos, textbegin, textend, &lastout, pat, M, oldpat, oldM)) return -1;
1156
CurrentByteOffset += currentpos - oldcurrentpos;
1160
for(k=0; k<=D; k++) R1[k] = R2[k] = ~0;
1161
if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
1162
((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */
1165
/* copying the code to save a few instructions.
1166
you need to understand the shift-or algorithm
1167
to figure this one... */
1170
CurrentByteOffset ++;
1171
if(c == r_newline) {
1172
for(k = 0 ; k <= D; k++) R1[k] = R2[k] = (~0 );
1175
R2[0] = (R1[0] >> 1) | r1;
1176
for(k = 1; k <= D; k++)
1177
R2[k] = ((R1[k] >> 1) | r1) & R1[k-1] & ((R1[k-1] & R2[k-1]) >> 1);
1178
if((R2[D] & endpos) == 0) {
1181
if(FILENAMEONLY) return 0;
1183
CurrentByteOffset += lastend - i;
1187
int oldcurrentpos = currentpos;
1188
if (-1 == s_output(text, ¤tpos, textbegin, textend, &lastout, pat, M, oldpat, oldM)) return -1;
1189
CurrentByteOffset += currentpos - oldcurrentpos;
1193
for(k=0; k<=D; k++) R1[k] = R2[k] = ~0;
1194
if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
1195
((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */
1201
if (!SILENT && INVERSE && !COUNT && (lastout <= textend)) {
1202
if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
1203
if (agrep_finalfp != NULL)
1204
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH);
1206
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
1207
if (newlen + agrep_outpointer >= agrep_outlen) {
1211
agrep_outpointer += newlen;
1215
else { /* NOT TCOMPRESSED */
1216
if (agrep_finalfp != NULL)
1217
fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp);
1219
if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) {
1223
memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout + 1);
1224
agrep_outpointer += (textend - lastout + 1);
1232
/* Don't update CurrentByteOffset here: done by caller */
1234
s_output(text, i, textbegin, textend, lastout, pat, m, oldpat, oldm)
1235
int *i; /* in, out */
1237
CHARTYPE *text, *textbegin, *textend, *pat, *oldpat;
1238
CHARTYPE **lastout; /* in, out */
1243
CHARTYPE *curtextbegin;
1244
CHARTYPE *curtextend;
1246
struct timeval initt, finalt;
312
R1[1] = R2[1] = ~Bit1;
313
for(k = 1; k <= D; k++) R1[k] = R2[k] = (R1[k-1] >> 1) & R1[k-1];
314
while (i < Candidate[round][1])
318
for(k = 0 ; k <= D; k++) R1[k] = R2[k] = (~0 );
321
R1[0] = (R2[0] >> 1) | r1;
323
R1[k] = ((R2[k] >> 1) | r1) & R2[k-1] & ((R1[k-1] & R2[k-1]) >> 1);
324
if((R1[D] & endpos) == 0) {
326
if(FILENAMEONLY) { return; }
328
if(i <= lastend) i = lastend;
330
s_output(text, ¤tpos);
334
for(k=0; k<=D; k++) R1[k] = R2[k] = ~0;
338
for(k = 0 ; k <= D; k++) R1[k] = R2[k] = (~0 );
341
R2[0] = (R1[0] >> 1) | r1;
342
for(k = 1; k <= D; k++)
343
R2[k] = ((R1[k] >> 1) | r1) & R1[k-1] & ((R1[k-1] & R2[k-1]) >> 1);
344
if((R2[D] & endpos) == 0) { currentpos = i;
346
if(FILENAMEONLY) { return; }
347
if(i <= lastend) i = lastend;
349
s_output(text, ¤tpos);
353
for(k=0; k<=D; k++) R1[k] = R2[k] = ~0;
361
int *i; CHARTYPE *text;
366
while(text[*i] != '\n') *i = *i + 1;
369
if(FNAME == ON) printf("%s: ", CurrentFileName);
371
while(text[--bp] != '\n');
372
while(text[++bp] != '\n') putchar(text[bp]);
1249
if(SILENT) return 0;
1250
if (TCOMPRESSED == ON) {
1252
curtextbegin = text + *i; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
1253
if (*curtextbegin == '\n') curtextbegin ++;
1254
curtextend = curtextbegin /*text -m + *i*/ /* + 1 agrep() has i++ */; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
1255
if (*curtextend == '\n') curtextend ++;
1258
curtextbegin = backward_delimiter(text + *i, text, tc_D_pattern, tc_D_length, OUTTAIL);
1259
curtextend = forward_delimiter(curtextbegin /*text -m + *i*/ /* + 1 agrep() has i++ */, textend, tc_D_pattern, tc_D_length, OUTTAIL);
1264
curtextbegin = text + *i; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
1265
if (*curtextbegin == '\n') curtextbegin ++;
1266
curtextend = curtextbegin /*text -m + *i*/ /* + 1 agrep() has i++ */; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
1267
if (*curtextend == '\n') curtextend ++;
1270
curtextbegin = backward_delimiter(text + *i, text, D_pattern, D_length, OUTTAIL);
1271
curtextend = forward_delimiter(curtextbegin /*text -m + *i*/ /* + 1 agrep() has i++ */, textend, D_pattern, D_length, OUTTAIL);
1275
if (TCOMPRESSED == ON) {
1277
gettimeofday(&initt, NULL);
1278
#endif /*MEASURE_TIMES*/
1279
if (-1 == exists_tcompressed_word(pat, m, curtextbegin, text + *i - curtextbegin + m, EASYSEARCH)) {
1284
gettimeofday(&finalt, NULL);
1285
FILTERALGO_ms += (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
1286
#endif /*MEASURE_TIMES*/
1289
textbegin = curtextend; /*(curtextend - 1 > textbegin ? curtextend - 1 : curtextend); */
1291
*i += textbegin - (text + *i);
1296
if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
1297
if (agrep_finalfp != NULL)
1298
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, *lastout, curtextbegin - *lastout, agrep_finalfp, -1, EASYSEARCH);
1300
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, *lastout, curtextbegin - *lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
1301
if (newlen + agrep_outpointer >= agrep_outlen) {
1305
agrep_outpointer += newlen;
1309
CurrentByteOffset += textbegin - text;
1312
else { /* NOT TCOMPRESSED */
1313
if (agrep_finalfp != NULL)
1314
fwrite(*lastout, 1, curtextbegin-*lastout, agrep_finalfp);
1316
if (curtextbegin - *lastout + agrep_outpointer >= agrep_outlen) {
1320
memcpy(agrep_outbuffer+agrep_outpointer, *lastout, curtextbegin-*lastout);
1321
agrep_outpointer += (curtextbegin - *lastout);
1324
CurrentByteOffset += textbegin - text;
1330
if(FNAME && (NEW_FILE || !POST_FILTER)) {
1331
char nextchar = (POST_FILTER == ON)?'\n':' ';
1332
char *prevstring = (POST_FILTER == ON)?"\n":"";
1334
if (agrep_finalfp != NULL)
1335
fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName);
1338
if (prevstring[0] != '\0') {
1339
if(agrep_outpointer + 1 >= agrep_outlen) {
1343
else agrep_outbuffer[agrep_outpointer ++] = prevstring[0];
1345
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
1346
(CurrentFileName[outindex] != '\0'); outindex++) {
1347
agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
1349
if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
1353
agrep_outpointer += outindex;
1355
if (PRINTFILETIME) {
1356
char *s = aprint_file_time(CurrentFileTime);
1357
if (agrep_finalfp != NULL)
1358
fprintf(agrep_finalfp, "%s", s);
1361
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
1362
(s[outindex] != '\0'); outindex++) {
1363
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
1365
if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
1369
agrep_outpointer += outindex;
1372
if (agrep_finalfp != NULL)
1373
fprintf(agrep_finalfp, ":%c", nextchar);
1375
if (agrep_outpointer+2>= agrep_outlen) {
1380
agrep_outbuffer[agrep_outpointer++] = ':';
1381
agrep_outbuffer[agrep_outpointer++] = nextchar;
1390
if (agrep_finalfp != NULL)
1391
fprintf(agrep_finalfp, "%d= ", CurrentByteOffset);
1395
sprintf(s, "%d= ", CurrentByteOffset);
1396
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
1397
(s[outindex] != '\0'); outindex++) {
1398
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
1400
if (s[outindex] != '\0') {
1404
agrep_outpointer += outindex;
1410
if (agrep_finalfp != NULL)
1411
fprintf(agrep_finalfp, "@%d{%d} ", CurrentByteOffset - (text + oldi-curtextbegin), curtextend-curtextbegin);
1415
sprintf(s, "@%d{%d} ", CurrentByteOffset - (text + oldi-curtextbegin), curtextend-curtextbegin);
1416
for (outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
1417
(s[outindex] != '\0'); outindex ++) {
1418
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
1420
if (s[outindex] != '\0') {
1424
agrep_outpointer += outindex;
1430
if (TCOMPRESSED == ON) {
1432
gettimeofday(&initt, NULL);
1433
#endif /*MEASURE_TIMES*/
1434
if (agrep_finalfp != NULL) {
1435
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_finalfp, -1, EASYSEARCH);
1438
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
1439
if (agrep_outpointer + newlen + 1 >= agrep_outlen) {
1443
agrep_outpointer += newlen;
1447
gettimeofday(&finalt, NULL);
1448
OUTFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
1449
#endif /*MEASURE_TIMES*/
1452
if (agrep_finalfp != NULL) {
1453
fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp);
1456
if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) {
1460
memcpy(agrep_outbuffer + agrep_outpointer, curtextbegin, curtextend - curtextbegin);
1461
agrep_outpointer += curtextend - curtextbegin;
1466
if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);
1467
else agrep_outbuffer[agrep_outpointer ++] = '\n';
378
1474
prep_bm(Pattern, m)
379
unsigned char *Pattern;
1475
unsigned char *Pattern;
1480
unsigned char lastc;
385
1481
for (i = 0; i < MAXSYM; i++) SHIFT[i] = m;
386
1482
for (i = m-1; i>=0; i--) {
387
1483
hash = TR[Pattern[i]];
388
if(SHIFT[hash] >= m - 1) SHIFT[hash] = m-1-i;
1484
if((int)(SHIFT[hash]) >= (int)(m - 1)) SHIFT[hash] = m-1-i;
1487
/* shift_1 records the previous occurrence of the last character of
1488
the pattern. When we match this last character but do not have a match,
1489
we can shift until we reach the next occurrence from the right. */
391
1490
lastc = TR[Pattern[m-1]];
392
1491
for (i= m-2; i>=0; i--) {
393
1492
if(TR[Pattern[i]] == lastc )
394
{ shift_1 = m-1 - i; i = -1; }
396
if(shift_1 == 0) shift_1 = 1;
397
if(NOUPPER) for(i='A'; i<='Z'; i++) SHIFT[i] = SHIFT[i + 'a' - 'A'];
1498
if(shift_1 == 0) shift_1 = 1; /* can never happen - Udi 11/7/94 */
1499
if(NOUPPER) for(i=0; i<MAXSYM; i++) {
1500
if (isupper(i)) SHIFT[i] = SHIFT[tolower(i)];
1501
/* SHIFT[i] = SHIFT[i + 'a' - 'A']; */
399
for(i='a'; i<='z'; i++) printf("%c: %d", i, SHIFT[i]); printf("\n");
400
for(i='A'; i<='Z'; i++) printf("%c: %d", i, SHIFT[i]); printf("\n");
1504
for(i='a'; i<='z'; i++) printf("%c: %d", i, SHIFT[i]);
1506
for(i='A'; i<='Z'; i++) printf("%c: %d", i, SHIFT[i]);
1511
/* monkey uses two characters for delta_1 shifting */
1513
CHARTYPE SHIFT_2[MAX_SHIFT_2];
1516
monkey( pat, m, text, textend )
1518
register CHARTYPE *text, *textend, *pat;
1521
register unsigned hash;
1522
register CHARTYPE shift;
1524
CHARTYPE *textbegin = text;
1525
CHARTYPE *textstart;
1527
CHARTYPE *curtextbegin;
1528
CHARTYPE *curtextend;
1530
struct timeval initt, finalt;
1532
CHARTYPE *lastout = text;
1536
CurrentByteOffset += m1;
1537
while (text < textend) {
1540
hash = (hash << 3) + TR[*(text-1)];
1541
shift = SHIFT_2[hash];
1543
text = text + shift;
1544
hash = (TR[*text] << 3) + TR[*(text-1)];
1545
shift = SHIFT_2[hash];
1547
CurrentByteOffset += text - textstart;
1549
while(TR[pat[m1 - j]] == TR[*(text - j)]) {
1553
if(text > textend) return 0; /* Udi: used to be >= for some reason */
1554
/* added by Udi 11/7/94 */
1556
/* if(isalnum(*(unsigned char *)(text+1))) goto CONT; --> fixed by SHIOZAKI Takehiko <takehi-s@ascii.co.jp> */
1557
if((text+1 <= textend) && isalnum(*(unsigned char *)(text+1)) && isalnum(*(unsigned char *)text)) {
1558
goto CONT; /* as if there was no match */
1560
/* if(isalnum(*(unsigned char *)(text-m))) goto CONT; --> fixed by SHIOZAKI Takehiko <takehi-s@ascii.co.jp> */
1561
if((textbegin <= (text-m)) && isalnum(*(unsigned char *)(text-m)) && isalnum(*(unsigned char *)(text-m+1))) {
1562
goto CONT; /* as if there was no match */
1564
/* changed by Udi 11/7/94 to avoid having to set TR[] to W_delim */
1567
if (TCOMPRESSED == ON) {
1568
/* Don't update CurrentByteOffset here: only before outputting properly */
1570
curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
1571
if (*curtextbegin == '\n') curtextbegin ++;
1572
curtextend = curtextbegin /*text-m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
1573
if (*curtextend == '\n') curtextend ++;
1576
curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL);
1577
curtextend = forward_delimiter(curtextbegin /*text -m*/, textend, tc_D_pattern, tc_D_length, OUTTAIL);
1581
/* Don't update CurrentByteOffset here: only before outputting properly */
1583
curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
1584
if (*curtextbegin == '\n') curtextbegin ++;
1585
curtextend = curtextbegin /*text-m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
1586
if (*curtextend == '\n') curtextend ++;
1589
curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL);
1590
curtextend = forward_delimiter(curtextbegin/*text -m*/, textend, D_pattern, D_length, OUTTAIL);
1594
if (TCOMPRESSED == ON) {
1596
gettimeofday(&initt, NULL);
1597
#endif /*MEASURE_TIMES*/
1598
if (-1 == exists_tcompressed_word(pat, m, curtextbegin, text - curtextbegin + m, EASYSEARCH))
1599
goto CONT; /* as if there was no match */
1601
gettimeofday(&finalt, NULL);
1602
FILTERALGO_ms += (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
1603
#endif /*MEASURE_TIMES*/
1606
textbegin = curtextend; /*(curtextend - 1 > textbegin ? curtextend - 1 : curtextend); */
1608
if(FILENAMEONLY) return 0;
1611
if(FNAME && (NEW_FILE || !POST_FILTER)) {
1612
char nextchar = (POST_FILTER == ON)?'\n':' ';
1613
char *prevstring = (POST_FILTER == ON)?"\n":"";
1615
if (agrep_finalfp != NULL)
1616
fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName);
1619
if (prevstring[0] != '\0') {
1620
if(agrep_outpointer + 1 >= agrep_outlen) {
1624
else agrep_outbuffer[agrep_outpointer ++] = prevstring[0];
1626
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
1627
(CurrentFileName[outindex] != '\0'); outindex++) {
1628
agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
1630
if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
1634
agrep_outpointer += outindex;
1636
if (PRINTFILETIME) {
1637
char *s = aprint_file_time(CurrentFileTime);
1638
if (agrep_finalfp != NULL)
1639
fprintf(agrep_finalfp, "%s", s);
1642
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
1643
(s[outindex] != '\0'); outindex++) {
1644
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
1646
if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
1650
agrep_outpointer += outindex;
1653
if (agrep_finalfp != NULL)
1654
fprintf(agrep_finalfp, ":%c", nextchar);
1656
if (agrep_outpointer+2>= agrep_outlen) {
1661
agrep_outbuffer[agrep_outpointer++] = ':';
1662
agrep_outbuffer[agrep_outpointer++] = nextchar;
1671
if (agrep_finalfp != NULL)
1672
fprintf(agrep_finalfp, "%d= ", CurrentByteOffset);
1676
sprintf(s, "%d= ", CurrentByteOffset);
1677
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
1678
(s[outindex] != '\0'); outindex++) {
1679
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
1681
if (s[outindex] != '\0') {
1685
agrep_outpointer += outindex;
1691
if (agrep_finalfp != NULL)
1692
fprintf(agrep_finalfp, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);
1696
sprintf(s, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);
1697
for (outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
1698
(s[outindex] != '\0'); outindex ++) {
1699
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
1701
if (s[outindex] != '\0') {
1705
agrep_outpointer += outindex;
1710
CurrentByteOffset += textbegin - text;
1714
if (TCOMPRESSED == ON) {
1716
gettimeofday(&initt, NULL);
1717
#endif /*MEASURE_TIMES*/
1718
if (agrep_finalfp != NULL)
1719
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_finalfp, -1, EASYSEARCH);
1721
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
1722
if (agrep_outpointer + newlen + 1 >= agrep_outlen) {
1726
agrep_outpointer += newlen;
1730
gettimeofday(&finalt, NULL);
1731
OUTFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
1732
#endif /*MEASURE_TIMES*/
1735
if (agrep_finalfp != NULL) {
1736
fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp);
1739
if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) {
1743
memcpy(agrep_outbuffer+agrep_outpointer, curtextbegin, curtextend-curtextbegin);
1744
agrep_outpointer += curtextend - curtextbegin;
1749
if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);
1750
else agrep_outbuffer[agrep_outpointer ++] = '\n';
1754
else { /* INVERSE */
1756
if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
1757
if (agrep_finalfp != NULL)
1758
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH);
1760
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
1761
if (newlen + agrep_outpointer >= agrep_outlen) {
1765
agrep_outpointer += newlen;
1769
CurrentByteOffset += textbegin - text;
1772
else { /* NOT TCOMPRESSED */
1773
if (agrep_finalfp != NULL)
1774
fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp);
1776
if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) {
1780
memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout);
1781
agrep_outpointer += (curtextbegin - lastout);
1784
CurrentByteOffset += textbegin - text;
1791
CurrentByteOffset += textbegin - text;
1795
/* Counteract the ++ below */
1797
CurrentByteOffset --;
1798
if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
1799
((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */
1803
CurrentByteOffset ++;
1806
if (!SILENT && INVERSE && !COUNT && (lastout <= textend)) {
1807
if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
1808
if (agrep_finalfp != NULL)
1809
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH);
1811
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
1812
if (newlen + agrep_outpointer >= agrep_outlen) {
1816
agrep_outpointer += newlen;
1820
else { /* NOT TCOMPRESSED */
1821
if (agrep_finalfp != NULL)
1822
fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp);
1824
if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) {
1828
memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout + 1);
1829
agrep_outpointer += (textend - lastout + 1);
405
1837
/* a_monkey() the approximate monkey move */
407
1839
a_monkey( pat, m, text, textend, D )
408
register int m, D ; register CHARTYPE *text, *textend, *pat;
1841
register CHARTYPE *text, *textend, *pat;
410
register CHARTYPE *oldtext;
411
register unsigned hash, i, hashmask, suffix_error;
412
register int m1 = m-1-D, j, pos;
1844
register CHARTYPE *oldtext;
1845
CHARTYPE *curtextbegin;
1846
CHARTYPE *curtextend;
1847
register unsigned hash, hashmask, suffix_error;
1848
register int m1 = m-1-D, pos;
1849
CHARTYPE *textbegin = text;
1850
CHARTYPE *textstart;
1851
CHARTYPE *lastout = text;
416
while (text < textend) {
419
while(suffix_error <= D) {
1854
hashmask = Hashmask;
1856
while (text < textend) {
1860
while(suffix_error <= D) {
421
1862
while(MEMBER_1[hash]) {
422
hash = ((hash << LOG_ASCII) + *(text--)) & hashmask;
1863
hash = ((hash << LOG_ASCII) + *(text--)) & hashmask;
426
if(text <= oldtext) {
427
if((pos = verify(m, 2*m+D, D, pat, oldtext)) > 0) {
1867
CurrentByteOffset += text - textstart;
1868
if(text <= oldtext) {
1869
if((pos = verify(m, 2*m+D, D, pat, oldtext)) > 0) {
1870
CurrentByteOffset += (oldtext+pos - text);
428
1871
text = oldtext+pos;
429
if(text > textend) return;
1872
if(text > textend) return 0;
1874
/* Don't update CurrentByteOffset here: only before outputting properly */
1875
if (TCOMPRESSED == ON) {
1877
curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
1878
if (*curtextbegin == '\n') curtextbegin ++;
1879
curtextend = curtextbegin /*text -m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
1880
if (*curtextend == '\n') curtextend ++;
1883
curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL);
1884
curtextend = forward_delimiter(curtextbegin /*text -m*/, textend, tc_D_pattern, tc_D_length, OUTTAIL);
1889
curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
1890
if (*curtextbegin == '\n') curtextbegin ++;
1891
curtextend = curtextbegin/*text -m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
1892
if (*curtextend == '\n') curtextend ++;
1895
curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL);
1896
curtextend = forward_delimiter(curtextbegin/*text -m*/, textend, D_pattern, D_length, OUTTAIL);
1899
textbegin = curtextend; /* (curtextend - 1 > textbegin ? curtextend - 1 : curtextend); */
430
1901
num_of_matched++;
431
if(FILENAMEONLY) return;
433
if(FNAME) printf("%s: ", CurrentFileName);
434
while(*(--text) != '\n');
435
while(*(++text) != '\n') putchar(*text);
439
while(*text != '\n') text++;
450
/* monkey uses two characters for delta_1 shifting */
452
CHARTYPE SHIFT_2[MAX_SHIFT_2];
454
monkey( pat, m, text, textend )
455
register int m ; register CHARTYPE *text, *textend, *pat;
457
register unsigned hash, i;
458
register CHARTYPE shift;
460
register unsigned r_newline;
466
while (text < textend) {
468
hash = (hash << 3) + *(text-1);
469
shift = SHIFT_2[hash];
472
hash = (*text << 3) + *(text-1);
473
shift = SHIFT_2[hash];
476
while(TR[pat[m1 - j]] == TR[*(text - j)]) { if(++j == m) break; }
478
if(text >= textend) return;
480
if(FILENAMEONLY) return;
482
while (*text != r_newline) text++;
486
if(FNAME) printf("%s: ", CurrentFileName);
487
while(*(--text) != r_newline);
488
while(*(++text) != r_newline) putchar(*text);
1902
if(FILENAMEONLY) return 0;
1905
if(FNAME && (NEW_FILE || !POST_FILTER)) {
1906
char nextchar = (POST_FILTER == ON)?'\n':' ';
1907
char *prevstring = (POST_FILTER == ON)?"\n":"";
1909
if (agrep_finalfp != NULL)
1910
fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName);
1913
if (prevstring[0] != '\0') {
1914
if(agrep_outpointer + 1 >= agrep_outlen) {
1918
else agrep_outbuffer[agrep_outpointer ++] = prevstring[0];
1920
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
1921
(CurrentFileName[outindex] != '\0'); outindex++) {
1922
agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
1924
if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
1928
agrep_outpointer += outindex;
1930
if (PRINTFILETIME) {
1931
char *s = aprint_file_time(CurrentFileTime);
1932
if (agrep_finalfp != NULL)
1933
fprintf(agrep_finalfp, "%s", s);
1936
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
1937
(s[outindex] != '\0'); outindex++) {
1938
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
1940
if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
1944
agrep_outpointer += outindex;
1947
if (agrep_finalfp != NULL)
1948
fprintf(agrep_finalfp, ":%c", nextchar);
1950
if (agrep_outpointer+2>= agrep_outlen) {
1955
agrep_outbuffer[agrep_outpointer++] = ':';
1956
agrep_outbuffer[agrep_outpointer++] = nextchar;
1965
if (agrep_finalfp != NULL)
1966
fprintf(agrep_finalfp, "%d= ", CurrentByteOffset);
1970
sprintf(s, "%d= ", CurrentByteOffset);
1971
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
1972
(s[outindex] != '\0'); outindex++) {
1973
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
1975
if (s[outindex] != '\0') {
1979
agrep_outpointer += outindex;
1985
if (agrep_finalfp != NULL)
1986
fprintf(agrep_finalfp, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);
1990
sprintf(s, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);
1991
for (outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
1992
(s[outindex] != '\0'); outindex ++) {
1993
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
1995
if (s[outindex] != '\0') {
1999
agrep_outpointer += outindex;
2004
CurrentByteOffset += textbegin - text;
2008
if (TCOMPRESSED == ON) {
2010
gettimeofday(&initt, NULL);
2011
#endif /*MEASURE_TIMES*/
2012
if (agrep_finalfp != NULL)
2013
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_finalfp, -1, EASYSEARCH);
2015
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
2016
if (agrep_outpointer + newlen + 1 >= agrep_outlen) {
2020
agrep_outpointer += newlen;
2024
gettimeofday(&finalt, NULL);
2025
OUTFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
2026
#endif /*MEASURE_TIMES*/
2029
if (agrep_finalfp != NULL) {
2030
fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp);
2033
if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) {
2037
memcpy(agrep_outbuffer+agrep_outpointer, curtextbegin, curtextend-curtextbegin);
2038
agrep_outpointer += curtextend - curtextbegin;
2043
if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);
2044
else agrep_outbuffer[agrep_outpointer ++] = '\n';
2048
else { /* INVERSE */
2050
if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
2051
if (agrep_finalfp != NULL)
2052
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH);
2054
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
2055
if (newlen + agrep_outpointer >= agrep_outlen) {
2059
agrep_outpointer += newlen;
2063
CurrentByteOffset += textbegin - text;
2066
else { /* NOT TCOMPRESSED */
2067
if (agrep_finalfp != NULL)
2068
fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp);
2070
if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) {
2074
memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout);
2075
agrep_outpointer += (curtextbegin - lastout);
2078
CurrentByteOffset += textbegin - text;
2085
CurrentByteOffset += textbegin - text;
2088
if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
2089
((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */
2092
CurrentByteOffset += (oldtext + m - text);
2099
if (!SILENT && INVERSE && !COUNT && (lastout <= textend)) {
2100
if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
2101
if (agrep_finalfp != NULL)
2102
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH);
2104
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
2105
if (newlen + agrep_outpointer >= agrep_outlen) {
2109
agrep_outpointer += newlen;
2113
else { /* NOT TCOMPRESSED */
2114
if (agrep_finalfp != NULL)
2115
fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp);
2117
if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) {
2121
memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout + 1);
2122
agrep_outpointer += (textend - lastout + 1);
497
2131
am_preprocess(Pattern)
498
2132
CHARTYPE *Pattern;
502
2135
m = strlen(Pattern);
503
2136
for (i = 1, Hashmask = 1 ; i<16 ; i++) Hashmask = (Hashmask << 1) + 1 ;
504
2137
for (i = 0; i < MAXMEMBER_1; i++) MEMBER_1[i] = 0;
505
2138
for (i = m-1; i>=0; i--) {
506
2139
MEMBER_1[Pattern[i]] = 1;
508
2141
for (i = m-1; i > 0; i--) {
509
MEMBER_1[(Pattern[i] << LOG_ASCII) + Pattern[i-1]] = 1;
2142
MEMBER_1[(Pattern[i] << LOG_ASCII) + Pattern[i-1]] = 1;
514
2147
verify(m, n, D, pat, text)
515
2148
register int m, n, D;
516
2149
CHARTYPE *pat, *text;
518
int A[MAXPATT], B[MAXPATT];
519
register int last = D;
520
register int cost = 0;
521
register int k, i, c;
522
register int m1 = m+1;
523
CHARTYPE *textend = text+n;
524
CHARTYPE *textbegin = text;
2151
int A[MAXPATT], B[MAXPATT];
2152
register int last = D;
2153
register int cost = 0;
2154
register int k, i, c;
2155
register int m1 = m+1;
2156
CHARTYPE *textend = text+n;
2157
CHARTYPE *textbegin = text;
526
for (i = 0; i <= m1; i++) A[i] = B[i] = i;
527
while (text < textend)
529
for (k = 1; k <= last; k++)
532
if (pat[k-1] != *text)
533
{ if (B[k]+1 < cost) cost = B[k]+1;
534
if (A[k-1]+1 < cost) cost = A[k-1]+1; }
538
if(pat[last] == *text++) { A[last+1] = B[last]; last++; }
539
if(A[last] < D) A[last+1] = A[last++]+1;
540
while (A[last] > D) last = last - 1;
541
if(last >= m) return(text - textbegin - 1);
544
for(c = 0; c<=m1; c++) A[c] = B[c] = c;
546
for (k = 1; k <= last; k++)
549
if (pat[k-1] != *text)
550
{ if (A[k]+1 < cost) cost = A[k]+1;
551
if (B[k-1]+1 < cost) cost = B[k-1]+1; }
555
if(pat[last] == *text++) { B[last+1] = A[last]; last++; }
556
if(B[last] < D) B[last+1] = B[last++]+1;
557
while (B[last] > D) last = last -1;
558
if(last >= m) return(text - textbegin - 1);
561
for(c = 0; c<=m1; c++) A[c] = B[c] = c;
2159
for (i = 0; i <= m1; i++) A[i] = B[i] = i;
2160
while (text < textend)
2162
for (k = 1; k <= last; k++)
2165
if (pat[k-1] != *text)
2167
if (B[k]+1 < cost) cost = B[k]+1;
2168
if (A[k-1]+1 < cost) cost = A[k-1]+1;
2170
else cost = cost -1;
2173
if(pat[last] == *text++) {
2174
A[last+1] = B[last];
2177
if(A[last] < D) A[last+1] = A[last++]+1;
2178
while (A[last] > D) last = last - 1;
2179
if(last >= m) return(text - textbegin - 1);
2182
for(c = 0; c<=m1; c++) A[c] = B[c] = c;
2184
for (k = 1; k <= last; k++)
2187
if (pat[k-1] != *text)
2189
if (A[k]+1 < cost) cost = A[k]+1;
2190
if (B[k-1]+1 < cost) cost = B[k-1]+1;
2192
else cost = cost -1;
2195
if(pat[last] == *text++) {
2196
B[last+1] = A[last];
2199
if(B[last] < D) B[last+1] = B[last++]+1;
2200
while (B[last] > D) last = last -1;
2201
if(last >= m) return(text - textbegin - 1);
2204
for(c = 0; c<=m1; c++) A[c] = B[c] = c;
567
2210
/* preprocessing for monkey() */
569
2212
m_preprocess(Pattern)
570
2213
CHARTYPE *Pattern;
574
2217
m = strlen(Pattern);
575
2218
for (i = 0; i < MAX_SHIFT_2; i++) SHIFT_2[i] = m;
576
2219
for (i = m-1; i>=1; i--) {
2220
hash = TR[Pattern[i]];
578
2221
hash = hash << 3;
579
2222
for (j = 0; j< MAXSYM; j++) {
580
2223
if(SHIFT_2[hash+j] == m) SHIFT_2[hash+j] = m-1;
582
hash = hash + Pattern[i-1];
583
if(SHIFT_2[hash] >= m - 1) SHIFT_2[hash] = m-1-i;
2225
hash = hash + TR[Pattern[i-1]];
2226
if((int)(SHIFT_2[hash]) >= (int)(m - 1)) SHIFT_2[hash] = m-1-i;
586
2229
for (i= m-2; i>=0; i--) {
587
if(Pattern[i] == Pattern[m-1] )
588
{ shift_1 = m-1 - i; i = -1; }
2230
if(TR[Pattern[i]] == TR[Pattern[m-1]] )
590
2236
if(shift_1 == 0) shift_1 = 1;
594
2240
/* monkey4() the approximate monkey move */
2242
char *MEMBER_D = NULL;
598
2245
monkey4( pat, m, text, textend, D )
599
register int m, D ; register unsigned char *text, *pat, *textend;
2247
register unsigned char *text, *pat, *textend;
601
register unsigned char *oldtext;
602
register unsigned hash, i, hashmask, suffix_error;
603
register int m1=m-1-D, j, pos;
2250
register unsigned char *oldtext;
2251
register unsigned hash, hashmask, suffix_error;
2252
register int m1=m-1-D, pos;
2253
CHARTYPE *textbegin = text;
2254
CHARTYPE *textstart;
2255
CHARTYPE *curtextbegin;
2256
CHARTYPE *curtextend;
2257
CHARTYPE *lastout = text;
607
while (text < textend) {
610
while(suffix_error <= D) {
611
hash = char_map[*text--];
612
hash = ((hash << LOG_DNA) + char_map[*(text--)]) & hashmask;
613
while(MEMBER_D[hash]) {
614
hash = ((hash << LOG_DNA) + char_map[*(text--)]) & hashmask;
618
if(text <= oldtext) {
619
if((pos = verify(m, 2*m+D, D, pat, oldtext)) > 0) {
2260
hashmask = Hashmask;
2262
while (text < textend) {
2266
while(suffix_error <= D) {
2267
hash = char_map[*text--];
2268
hash = ((hash << LOG_DNA) + char_map[*(text--)]) & hashmask;
2269
while(MEMBER_D[hash]) {
2270
hash = ((hash << LOG_DNA) + char_map[*(text--)]) & hashmask;
2274
CurrentByteOffset += text - textstart;
2275
if(text <= oldtext) {
2276
if((pos = verify(m, 2*m+D, D, pat, oldtext)) > 0) {
2277
CurrentByteOffset += (oldtext+pos - text);
620
2278
text = oldtext+pos;
621
if(text > textend) return;
2279
if(text > textend) return 0;
2281
if (TCOMPRESSED == ON) {
2282
/* Don't update CurrentByteOffset here: only before outputting properly */
2284
curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
2285
if (*curtextbegin == '\n') curtextbegin ++;
2286
curtextend = curtextbegin /*text -m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
2287
if (*curtextend == '\n') curtextend ++;
2290
curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL);
2291
curtextend = forward_delimiter(curtextbegin/*text -m*/, textend, tc_D_pattern, tc_D_length, OUTTAIL);
2295
/* Don't update CurrentByteOffset here: only before outputting properly */
2297
curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
2298
if (*curtextbegin == '\n') curtextbegin ++;
2299
curtextend = curtextbegin /*text -m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
2300
if (*curtextend == '\n') curtextend ++;
2303
curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL);
2304
curtextend = forward_delimiter(curtextbegin/*text -m*/, textend, D_pattern, D_length, OUTTAIL);
2307
textbegin = curtextend; /*(curtextend - 1 > textbegin ? curtextend - 1 : curtextend); */
622
2309
num_of_matched++;
623
if(FILENAMEONLY) return;
625
if(FNAME) printf("%s:", CurrentFileName);
626
while(*(--text) != '\n');
627
while(*(++text) != '\n') putchar(*text);
632
while(*text != '\n') text++;
636
else text = oldtext + m;
2310
if(FILENAMEONLY) return 0;
2313
if(FNAME && (NEW_FILE || !POST_FILTER)) {
2314
char nextchar = (POST_FILTER == ON)?'\n':' ';
2315
char *prevstring = (POST_FILTER == ON)?"\n":"";
2317
if (agrep_finalfp != NULL)
2318
fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName);
2321
if (prevstring[0] != '\0') {
2322
if(agrep_outpointer + 1 >= agrep_outlen) {
2326
else agrep_outbuffer[agrep_outpointer ++] = prevstring[0];
2328
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
2329
(CurrentFileName[outindex] != '\0'); outindex++) {
2330
agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
2332
if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
2336
agrep_outpointer += outindex;
2338
if (PRINTFILETIME) {
2339
char *s = aprint_file_time(CurrentFileTime);
2340
if (agrep_finalfp != NULL)
2341
fprintf(agrep_finalfp, "%s", s);
2344
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
2345
(s[outindex] != '\0'); outindex++) {
2346
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
2348
if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {
2352
agrep_outpointer += outindex;
2355
if (agrep_finalfp != NULL)
2356
fprintf(agrep_finalfp, ":%c", nextchar);
2358
if (agrep_outpointer+2>= agrep_outlen) {
2363
agrep_outbuffer[agrep_outpointer++] = ':';
2364
agrep_outbuffer[agrep_outpointer++] = nextchar;
2373
if (agrep_finalfp != NULL)
2374
fprintf(agrep_finalfp, "%d= ", CurrentByteOffset);
2378
sprintf(s, "%d= ", CurrentByteOffset);
2379
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
2380
(s[outindex] != '\0'); outindex++) {
2381
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
2383
if (s[outindex] != '\0') {
2387
agrep_outpointer += outindex;
2393
if (agrep_finalfp != NULL)
2394
fprintf(agrep_finalfp, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);
2398
sprintf(s, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);
2399
for (outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
2400
(s[outindex] != '\0'); outindex ++) {
2401
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
2403
if (s[outindex] != '\0') {
2407
agrep_outpointer += outindex;
2412
CurrentByteOffset += textbegin + 1 - text;
2413
text = textbegin + 1;
2416
if (TCOMPRESSED == ON) {
2418
gettimeofday(&initt, NULL);
2419
#endif /*MEASURE_TIMES*/
2420
if (agrep_finalfp != NULL)
2421
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_finalfp, -1, EASYSEARCH);
2423
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
2424
if (agrep_outpointer + newlen + 1 >= agrep_outlen) {
2428
agrep_outpointer += newlen;
2432
gettimeofday(&finalt, NULL);
2433
OUTFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
2434
#endif /*MEASURE_TIMES*/
2437
if (agrep_finalfp != NULL) {
2438
fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp);
2441
if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) {
2445
memcpy(agrep_outbuffer+agrep_outpointer, curtextbegin, curtextend-curtextbegin);
2446
agrep_outpointer += curtextend - curtextbegin;
2451
if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);
2452
else agrep_outbuffer[agrep_outpointer ++] = '\n';
2456
else { /* INVERSE */
2458
if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
2459
if (agrep_finalfp != NULL)
2460
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH);
2462
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
2463
if (newlen + agrep_outpointer >= agrep_outlen) {
2467
agrep_outpointer += newlen;
2471
CurrentByteOffset += textbegin + 1 - text;
2472
text = textbegin + 1;
2474
else { /* NOT TCOMPRESSED */
2475
if (agrep_finalfp != NULL)
2476
fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp);
2478
if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) {
2482
memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout);
2483
agrep_outpointer += (curtextbegin - lastout);
2486
CurrentByteOffset += textbegin + 1 - text;
2487
text = textbegin + 1;
2493
CurrentByteOffset += textbegin + 1 - text;
2494
text = textbegin + 1 ;
2496
if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
2497
((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */
2499
else { CurrentByteOffset += (oldtext + m - text); text = oldtext + m; }
2504
if (!SILENT && INVERSE && !COUNT && (lastout <= textend)) {
2505
if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
2506
if (agrep_finalfp != NULL)
2507
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH);
2509
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
2510
if (newlen + agrep_outpointer >= agrep_outlen) {
2514
agrep_outpointer += newlen;
2518
else { /* NOT TCOMPRESSED */
2519
if (agrep_finalfp != NULL)
2520
fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp);
2522
if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) {
2526
memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout + 1);
2527
agrep_outpointer += (textend - lastout + 1);
642
2536
prep4(Pattern, m)
643
char *Pattern; int m;
648
for(i=0; i< MAXSYM; i++) char_map[i] = 0;
649
char_map['a'] = char_map['A'] = 4;
650
char_map['g'] = char_map['g'] = 1;
651
char_map['t'] = char_map['t'] = 2;
652
char_map['c'] = char_map['c'] = 3;
653
char_map['n'] = char_map['n'] = 5;
2543
for(i=0; i< MAXSYM; i++) char_map[i] = 0;
2544
char_map['a'] = char_map['A'] = 4;
2545
char_map['g'] = char_map['g'] = 1;
2546
char_map['t'] = char_map['t'] = 2;
2547
char_map['c'] = char_map['c'] = 3;
2548
char_map['n'] = char_map['n'] = 5;
655
2550
BSize = blog(4, m);
656
for (i = 1, Hashmask = 1 ; i<BSize*LOG_DNA; i++) Hashmask = (Hashmask << 1) + 1 ;
2551
for (i = 1, Hashmask = 1 ; i<(int)(BSize*LOG_DNA); i++) Hashmask = (Hashmask << 1) + 1 ;
2552
if (MEMBER_D != NULL) free(MEMBER_D);
657
2553
MEMBER_D = (char *) malloc((Hashmask+1) * sizeof(char));
659
2555
printf("BSize = %d", BSize);
661
2557
for (i=0; i <= Hashmask; i++) MEMBER_D[i] = 0;
662
for (j=0; j < BSize; j++) {
663
for(i=m-1; i >= j; i--) {
665
for(k=0; k <= j; k++)
666
hash = (hash << LOG_DNA) +char_map[Pattern[i-k]];
2558
for (j=0; j < (int)BSize; j++) {
2559
for(i=m-1; i >= j; i--) {
2561
for(k=0; k <= j; k++)
2562
hash = (hash << LOG_DNA) +char_map[Pattern[i-k]];
668
printf("< %d >, ", hash);
2564
printf("< %d >, ", hash);
681
2578
for (i = 1; exp < m; i++) exp = exp * base;