6
6
/* process start of line, endof line symbol, */
7
7
/* process -w WORDBOUND option, append special symbol at begin&end of */
8
8
/* process -d option before this routine */
9
/* the delimiter pattern is in D_pattern (need to end with '; ') */
9
/* the delimiter pattern is in D_pattern (need to end with '; ') */
10
10
/* if '-t' (suggestion: how about -B) the pattern is passed to sgrep */
11
11
/* and doesn't go here */
12
12
/* in that case, -d is ignored? or not necessary */
13
13
/* upon return, Pattern contains the pattern to be processed by maskgen */
14
14
/* D_pattern contains transformed D_pattern */
18
extern int SIMPLEPATTERN, WHOLELINE, REGEX, RE_ERR, DELIMITER, TAIL, WORDBOUND;
19
extern int PAT_FILE, PAT_BUFFER;
20
extern ParseTree *AParse;
21
extern int WHOLELINE, REGEX, FASTREGEX, RE_ERR, DELIMITER, TAIL, WORDBOUND;
20
23
extern CHAR Progname[];
24
extern int D_length, tc_D_length;
25
extern CHAR tc_D_pattern[MaxDelimit * 2];
22
26
extern int table[WORD][WORD];
27
extern int agrep_initialfd;
28
extern int EXITONERROR;
32
extern char *multibuf;
34
extern int anum_terminals;
35
extern ParseTree aterminals[MAXNUM_PAT];
36
extern char FREQ_FILE[MAX_LINE_LEN], HASH_FILE[MAX_LINE_LEN], STRING_FILE[MAX_LINE_LEN]; /* interfacing with tcompress */
37
extern int AComplexBoolean;
24
40
preprocess(D_pattern, Pattern) /* need two parameters */
25
CHAR *D_pattern, *Pattern;
41
CHAR D_pattern[], Pattern[];
27
CHAR temp[Maxline], *r_pat, *old_pat; /* r_pat for r.e. */
28
CHAR old_D_pat[MaxDelimit];
29
int i, j=0, rp=0, m, t=0, partitions, num_pos, ANDON = 0;
31
int IN_RANGE=0, EVEN=0, OR_AND=0;
32
old_pat = Pattern; /* to remember the starting position */
35
if(Pattern[i] == '\\') i++;
36
else if(Pattern[i] == '|' || Pattern[i] == '*' ) REGEX = ON;
38
r_pat = (CHAR *) malloc(strlen(Pattern)+2*strlen(D_pattern));
39
strcpy(temp, D_pattern);
40
d_end = t = strlen(temp); /* size of D_pattern, including '; ' */
41
if (WHOLELINE) { temp[t++] = LANGLE;
45
strcat(temp, Pattern);
52
if (WORDBOUND) { temp[t++] = LANGLE;
56
strcat(temp, Pattern);
58
if (WORDBOUND) { temp[m++] = LANGLE;
63
/* now temp contains augmented pattern , m it's size */
66
for (i=0, j=0; i< d_end-2; i++) {
70
Pattern[j++] = temp[i];
71
old_D_pat[D_length++] = temp[i];
73
case '<' : Pattern[j++] = LANGLE;
75
case '>' : Pattern[j++] = RANGLE;
77
case '^' : Pattern[j++] = '\n';
78
old_D_pat[D_length++] = temp[i];
80
case '$' : Pattern[j++] = '\n';
81
old_D_pat[D_length++] = temp[i];
83
default : Pattern[j++] = temp[i];
84
old_D_pat[D_length++] = temp[i];
88
if(D_length > MAXDELIM) {
89
fprintf(stderr, "%s: delimiter pattern too long\n", Progname);
92
Pattern[j++] = ANDPAT;
93
old_D_pat[D_length] = '\0';
94
strcpy(D_pattern, old_D_pat);
102
r_pat[rp++] = '.'; /* if REGEX: always append '.' in front */
104
Pattern[j++] = NOCARE;
107
for (i=d_end; i < m ; i++)
111
case '\\': i++; Pattern[j++] = temp[i];
112
r_pat[rp++] = 'o'; /* the symbol doesn't matter */
114
case '#': if(REGEX) {
115
Pattern[j++] = NOCARE;
119
Pattern[j++] = WILDCD;
121
case '(': Pattern[j++] = LPARENT;
124
case ')': Pattern[j++] = RPARENT;
127
case '[': Pattern[j++] = LRANGE;
131
case ']': Pattern[j++] = RRANGE;
135
case '<': Pattern[j++] = LANGLE;
137
case '>': Pattern[j++] = RANGLE;
139
case '^': if (temp[i-1] == '[') Pattern[j++] = NOTSYM;
140
else Pattern[j++] = '\n';
143
case '$': Pattern[j++] = '\n';
146
case '.': Pattern[j++] = NOCARE;
149
case '*': Pattern[j++] = STAR;
152
case '|': Pattern[j++] = ORSYM;
155
case ',': Pattern[j++] = ORPAT;
158
case ';': if(ANDON) RE_ERR = ON;
159
Pattern[j++] = ANDPAT;
162
case '-': if(IN_RANGE) {
163
Pattern[j++] = HYPHEN;
167
Pattern[j++] = temp[i];
168
r_pat[rp++] = temp[i];
172
Pattern[j++] = temp[i];
175
default: Pattern[j++] = temp[i];
176
r_pat[rp++] = temp[i];
180
if(REGEX) { /* append ').' at end of regular expression */
183
Pattern[j++] = NOCARE;
191
if(DELIMITER || WORDBOUND) {
192
fprintf(stderr, "%s: -d or -w option is not supported for this pattern\n", Progname);
196
fprintf(stderr, "%s: illegal regular expression\n", Progname);
199
while(*Pattern != NOCARE && m-- > 0) Pattern++; /* poit to . */
200
num_pos = init(r_pat, table);
202
fprintf(stderr, "%s: illegal regular expression\n", Progname);
206
fprintf(stderr, "%s: regular expression too long\n", Progname);
209
strcpy(old_pat, Pattern); /* do real change to the Pattern to be returned */
43
CHAR temp[Maxline], *r_pat, *old_pat; /* r_pat for r.e. */
44
CHAR old_D_pat[MaxDelimit*2];
45
int i, j=0, rp=0, m, t=0, num_pos, ANDON = 0;
51
fprintf(stderr, "preprocess: m=%d, pat=%s, PAT_FILE=%d, PAT_BUFFER=%d\n", strlen(Pattern), Pattern, PAT_FILE, PAT_BUFFER);
53
if ((m = strlen(Pattern)) <= 0) return 0;
54
if (PAT_FILE || PAT_BUFFER) return 0;
57
old_pat = Pattern; /* to remember the starting position */
59
/* Check if pattern is a concatenation of ands OR ors of simple patterns */
60
multibuf = (char *)malloc(m * 2 + 2); /* worst case: a,a,a,a,a,a */
61
if (multibuf == NULL) goto normal_processing;
62
/* if (WORDBOUND) goto normal_processing; */
67
if (((ret1 = asplit_pattern(Pattern, m, aterminals, &anum_terminals, &AParse)) <= 0) || /* can change the pattern if simple boolean with {} */
68
((ret2 = asplit_terminal(0, anum_terminals, multibuf, &multilen)) <= 0) ||
69
((ret2 == 1) && !(aterminals[0].op & NOTPAT))) { /* must do normal processing */
70
if (AComplexBoolean && (AParse != NULL)) destroy_tree(AParse); /* so that direct exec invocations don't use AParse by mistake! */
72
fprintf(stderr, "preprocess: split_pat = %d, split_term = %d, #terms = %d\n", ret1, ret2, anum_terminals);
76
strcpy(Pattern, aterminals[0].data.leaf.value);
85
goto normal_processing;
88
/* This is quick processing */
89
if (AParse != 0) { /* successfully converted to ANDPAT/ORPAT */
91
/* printf("preprocess(): converted= %d, patterns= %s", AParse, multibuf); */
92
/* Now I have to process the delimiter if any */
94
/* D_pattern is "<PAT>; ", D_length is 1 + length of string PAT: see agrep.c/'d' */
95
preprocess_delimiter(D_pattern+1, D_length - 1, D_pattern, &D_length);
96
/* D_pattern is the exact stuff we want to match, D_length is its strlen */
97
if ((tc_D_length = quick_tcompress(FREQ_FILE, HASH_FILE, D_pattern, D_length, tc_D_pattern, MaxDelimit*2, TC_EASYSEARCH)) <= 0) {
98
strcpy(tc_D_pattern, D_pattern);
99
tc_D_length = D_length;
101
/* printf("mgrep's delim=%s,%d tc_delim=%s,%d\n", D_pattern, D_length, tc_D_pattern, tc_D_length); */
105
/* else either unknown character, one simple pattern or none at all */
108
for(i=0; i< m; i++) {
109
if(Pattern[i] == '\\') i++;
110
else if(Pattern[i] == '|' || Pattern[i] == '*') REGEX = ON;
113
r_pat = (CHAR *) malloc(strlen(Pattern)+2*strlen(D_pattern) + 8); /* bug-report, From: Chris Dalton <crd@hplb.hpl.hp.com> */
114
strcpy(temp, D_pattern);
115
d_end = t = strlen(temp); /* size of D_pattern, including '; ' */
121
strcat(temp, Pattern);
135
strcat(temp, Pattern);
144
/* now temp contains augmented pattern , m it's size */
146
for (i=0, j=0; i< d_end-2; i++) {
151
Pattern[j++] = temp[i];
152
old_D_pat[D_length++] = temp[i];
155
Pattern[j++] = LANGLE;
158
Pattern[j++] = RANGLE;
162
old_D_pat[D_length++] = temp[i];
166
old_D_pat[D_length++] = temp[i];
169
Pattern[j++] = temp[i];
170
old_D_pat[D_length++] = temp[i];
174
if(D_length > MAXDELIM) {
175
fprintf(stderr, "%s: delimiter pattern too long (has > %d chars)\n", Progname, MAXDELIM);
184
Pattern[j++] = ANDPAT;
185
old_D_pat[D_length] = '\0';
186
strcpy(D_pattern, old_D_pat);
194
r_pat[rp++] = '.'; /* if REGEX: always append '.' in front */
196
Pattern[j++] = NOCARE;
199
for (i=d_end; i < m ; i++)
205
Pattern[j++] = temp[i];
206
r_pat[rp++] = 'o'; /* the symbol doesn't matter */
211
Pattern[j++] = NOCARE;
216
Pattern[j++] = WILDCD;
219
Pattern[j++] = LPARENT;
223
Pattern[j++] = RPARENT;
227
Pattern[j++] = LRANGE;
232
Pattern[j++] = RRANGE;
237
Pattern[j++] = LANGLE;
240
Pattern[j++] = RANGLE;
243
if (temp[i-1] == '[') Pattern[j++] = NOTSYM;
244
else Pattern[j++] = '\n';
252
Pattern[j++] = NOCARE;
260
Pattern[j++] = ORSYM;
264
Pattern[j++] = ORPAT;
268
if(ANDON) RE_ERR = ON;
269
Pattern[j++] = ANDPAT;
274
Pattern[j++] = HYPHEN;
278
Pattern[j++] = temp[i];
279
r_pat[rp++] = temp[i];
283
Pattern[j++] = temp[i];
287
Pattern[j++] = temp[i];
288
r_pat[rp++] = temp[i];
292
if(REGEX) { /* append ').' at end of regular expression */
295
Pattern[j++] = NOCARE;
303
if(DELIMITER || WORDBOUND) {
304
fprintf(stderr, "%s: -d or -w option is not supported for this pattern\n", Progname);
313
fprintf(stderr, "%s: illegal regular expression\n", Progname);
321
while(*Pattern != NOCARE && m-- > 0) Pattern++; /* poit to . */
322
num_pos = init(r_pat, table);
324
fprintf(stderr, "%s: illegal regular expression\n", Progname);
333
fprintf(stderr, "%s: regular expression too long\n", Progname);
341
strcpy(old_pat, Pattern); /* do real change to the Pattern to be returned */