1
/*************************************************
2
* PCRE DEMONSTRATION PROGRAM *
3
*************************************************/
5
/* This is a demonstration program to illustrate the most straightforward ways
6
of calling the PCRE regular expression library from a C program. See the
7
pcresample documentation for a short discussion.
10
gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
11
-R/usr/local/lib -lpcre
13
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
14
library files for PCRE are installed on your system. Only some operating
15
systems (e.g. Solaris) use the -R option.
23
#define OVECCOUNT 30 /* should be a multiple of 3 */
26
int main(int argc, char **argv)
32
unsigned char *name_table;
37
int ovector[OVECCOUNT];
42
/**************************************************************************
43
* First, sort out the command line. There is only one possible option at *
44
* the moment, "-g" to request repeated matching to find all occurrences, *
45
* like Perl's /g option. We set the variable find_all to a non-zero value *
46
* if the -g option is present. Apart from that, there must be exactly two *
48
**************************************************************************/
51
for (i = 1; i < argc; i++)
53
if (strcmp(argv[i], "-g") == 0) find_all = 1;
57
/* After the options, we require exactly two arguments, which are the pattern,
58
and the subject string. */
62
printf("Two arguments required: a regex and a subject string\n");
68
subject_length = (int)strlen(subject);
71
/*************************************************************************
72
* Now we are going to compile the regular expression pattern, and handle *
73
* and errors that are detected. *
74
*************************************************************************/
77
pattern, /* the pattern */
78
0, /* default options */
79
&error, /* for error message */
80
&erroffset, /* for error offset */
81
NULL); /* use default character tables */
83
/* Compilation failed: print the error message and exit */
87
printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
92
/*************************************************************************
93
* If the compilation succeeded, we call PCRE again, in order to do a *
94
* pattern match against the subject string. This does just ONE match. If *
95
* further matching is needed, it will be done below. *
96
*************************************************************************/
99
re, /* the compiled pattern */
100
NULL, /* no extra data - we didn't study the pattern */
101
subject, /* the subject string */
102
subject_length, /* the length of the subject */
103
0, /* start at offset 0 in the subject */
104
0, /* default options */
105
ovector, /* output vector for substring information */
106
OVECCOUNT); /* number of elements in the output vector */
108
/* Matching failed: handle error cases */
114
case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
116
Handle other special cases if you like
118
default: printf("Matching error %d\n", rc); break;
120
free(re); /* Release memory used for the compiled pattern */
126
printf("\nMatch succeeded at offset %d\n", ovector[0]);
129
/*************************************************************************
130
* We have found the first match within the subject string. If the output *
131
* vector wasn't big enough, set its size to the maximum. Then output any *
132
* substrings that were captured. *
133
*************************************************************************/
135
/* The output vector wasn't big enough */
140
printf("ovector only has room for %d captured substrings\n", rc - 1);
143
/* Show substrings stored in the output vector by number. Obviously, in a real
144
application you might want to do things other than print them. */
146
for (i = 0; i < rc; i++)
148
char *substring_start = subject + ovector[2*i];
149
int substring_length = ovector[2*i+1] - ovector[2*i];
150
printf("%2d: %.*s\n", i, substring_length, substring_start);
154
/**************************************************************************
155
* That concludes the basic part of this demonstration program. We have *
156
* compiled a pattern, and performed a single match. The code that follows *
157
* first shows how to access named substrings, and then how to code for *
158
* repeated matches on the same subject. *
159
**************************************************************************/
161
/* See if there are any named substrings, and if so, show them by name. First
162
we have to extract the count of named parentheses from the pattern. */
165
re, /* the compiled pattern */
166
NULL, /* no extra data - we didn't study the pattern */
167
PCRE_INFO_NAMECOUNT, /* number of named substrings */
168
&namecount); /* where to put the answer */
170
if (namecount <= 0) printf("No named substrings\n"); else
172
unsigned char *tabptr;
173
printf("Named substrings\n");
175
/* Before we can access the substrings, we must extract the table for
176
translating names to numbers, and the size of each entry in the table. */
179
re, /* the compiled pattern */
180
NULL, /* no extra data - we didn't study the pattern */
181
PCRE_INFO_NAMETABLE, /* address of the table */
182
&name_table); /* where to put the answer */
185
re, /* the compiled pattern */
186
NULL, /* no extra data - we didn't study the pattern */
187
PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
188
&name_entry_size); /* where to put the answer */
190
/* Now we can scan the table and, for each entry, print the number, the name,
191
and the substring itself. */
194
for (i = 0; i < namecount; i++)
196
int n = (tabptr[0] << 8) | tabptr[1];
197
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
198
ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
199
tabptr += name_entry_size;
204
/*************************************************************************
205
* If the "-g" option was given on the command line, we want to continue *
206
* to search for additional matches in the subject string, in a similar *
207
* way to the /g option in Perl. This turns out to be trickier than you *
208
* might think because of the possibility of matching an empty string. *
209
* What happens is as follows: *
211
* If the previous match was NOT for an empty string, we can just start *
212
* the next match at the end of the previous one. *
214
* If the previous match WAS for an empty string, we can't do that, as it *
215
* would lead to an infinite loop. Instead, a special call of pcre_exec() *
216
* is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first *
217
* of these tells PCRE that an empty string is not a valid match; other *
218
* possibilities must be tried. The second flag restricts PCRE to one *
219
* match attempt at the initial string position. If this match succeeds, *
220
* an alternative to the empty string match has been found, and we can *
221
* proceed round the loop. *
222
*************************************************************************/
226
free(re); /* Release the memory used for the compiled pattern */
227
return 0; /* Finish unless -g was given */
230
/* Loop for second and subsequent matches */
234
int options = 0; /* Normally no options */
235
int start_offset = ovector[1]; /* Start at end of previous match */
237
/* If the previous match was for an empty string, we are finished if we are
238
at the end of the subject. Otherwise, arrange to run another match at the
239
same point to see if a non-empty match can be found. */
241
if (ovector[0] == ovector[1])
243
if (ovector[0] == subject_length) break;
244
options = PCRE_NOTEMPTY | PCRE_ANCHORED;
247
/* Run the next matching operation */
250
re, /* the compiled pattern */
251
NULL, /* no extra data - we didn't study the pattern */
252
subject, /* the subject string */
253
subject_length, /* the length of the subject */
254
start_offset, /* starting offset in the subject */
255
options, /* options */
256
ovector, /* output vector for substring information */
257
OVECCOUNT); /* number of elements in the output vector */
259
/* This time, a result of NOMATCH isn't an error. If the value in "options"
260
is zero, it just means we have found all possible matches, so the loop ends.
261
Otherwise, it means we have failed to find a non-empty-string match at a
262
point where there was a previous empty-string match. In this case, we do what
263
Perl does: advance the matching position by one, and continue. We do this by
264
setting the "end of previous match" offset, because that is picked up at the
265
top of the loop as the point at which to start again. */
267
if (rc == PCRE_ERROR_NOMATCH)
269
if (options == 0) break;
270
ovector[1] = start_offset + 1;
271
continue; /* Go round the loop again */
274
/* Other matching errors are not recoverable. */
278
printf("Matching error %d\n", rc);
279
free(re); /* Release memory used for the compiled pattern */
285
printf("\nMatch succeeded again at offset %d\n", ovector[0]);
287
/* The match succeeded, but the output vector wasn't big enough. */
292
printf("ovector only has room for %d captured substrings\n", rc - 1);
295
/* As before, show substrings stored in the output vector by number, and then
296
also any named substrings. */
298
for (i = 0; i < rc; i++)
300
char *substring_start = subject + ovector[2*i];
301
int substring_length = ovector[2*i+1] - ovector[2*i];
302
printf("%2d: %.*s\n", i, substring_length, substring_start);
305
if (namecount <= 0) printf("No named substrings\n"); else
307
unsigned char *tabptr = name_table;
308
printf("Named substrings\n");
309
for (i = 0; i < namecount; i++)
311
int n = (tabptr[0] << 8) | tabptr[1];
312
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
313
ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
314
tabptr += name_entry_size;
317
} /* End of loop to find second and subsequent matches */
320
free(re); /* Release memory used for the compiled pattern */
324
/* End of pcredemo.c */