1
/*************************************************
2
* Perl-Compatible Regular Expressions *
3
*************************************************/
6
This is a library of functions to support regular expressions whose syntax
7
and semantics are as close as possible to those of the Perl 5 language. See
8
the file Tech.Notes for some information on the internals.
10
Written by: Philip Hazel <ph10@cam.ac.uk>
12
Copyright (c) 1997-2003 University of Cambridge
14
-----------------------------------------------------------------------------
15
Redistribution and use in source and binary forms, with or without
16
modification, are permitted provided that the following conditions are met:
18
* Redistributions of source code must retain the above copyright notice,
19
this list of conditions and the following disclaimer.
21
* Redistributions in binary form must reproduce the above copyright
22
notice, this list of conditions and the following disclaimer in the
23
documentation and/or other materials provided with the distribution.
25
* Neither the name of the University of Cambridge nor the names of its
26
contributors may be used to endorse or promote products derived from
27
this software without specific prior written permission.
29
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39
POSSIBILITY OF SUCH DAMAGE.
40
-----------------------------------------------------------------------------
43
/* This module contains some convenience functions for extracting substrings
44
from the subject string after a regex match has succeeded. The original idea
45
for these functions came from Scott Wimer. */
48
/* Include the internals header, which itself includes Standard C headers plus
49
the external pcre header. */
54
/*************************************************
55
* Find number for named string *
56
*************************************************/
58
/* This function is used by the two extraction functions below, as well
59
as being generally available.
62
code the compiled regex
63
stringname the name whose number is required
65
Returns: the number of the named parentheses, or a negative number
66
(PCRE_ERROR_NOSUBSTRING) if not found
70
pcre_get_stringnumber(const pcre *code, const char *stringname)
77
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
79
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
81
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
83
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
89
int mid = (top + bot) / 2;
90
uschar *entry = nametable + entrysize*mid;
91
int c = strcmp(stringname, (char *)(entry + 2));
92
if (c == 0) return (entry[0] << 8) + entry[1];
93
if (c > 0) bot = mid + 1; else top = mid;
96
return PCRE_ERROR_NOSUBSTRING;
101
/*************************************************
102
* Copy captured string to given buffer *
103
*************************************************/
105
/* This function copies a single captured substring into a given buffer.
106
Note that we use memcpy() rather than strncpy() in case there are binary zeros
110
subject the subject string that was matched
111
ovector pointer to the offsets table
112
stringcount the number of substrings that were captured
113
(i.e. the yield of the pcre_exec call, unless
114
that was zero, in which case it should be 1/3
115
of the offset table size)
116
stringnumber the number of the required substring
117
buffer where to put the substring
118
size the size of the buffer
120
Returns: if successful:
121
the length of the copied string, not including the zero
122
that is put on the end; can be zero
124
PCRE_ERROR_NOMEMORY (-6) buffer too small
125
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
129
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
130
int stringnumber, char *buffer, int size)
133
if (stringnumber < 0 || stringnumber >= stringcount)
134
return PCRE_ERROR_NOSUBSTRING;
136
yield = ovector[stringnumber+1] - ovector[stringnumber];
137
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
138
memcpy(buffer, subject + ovector[stringnumber], yield);
145
/*************************************************
146
* Copy named captured string to given buffer *
147
*************************************************/
149
/* This function copies a single captured substring into a given buffer,
150
identifying it by name.
153
code the compiled regex
154
subject the subject string that was matched
155
ovector pointer to the offsets table
156
stringcount the number of substrings that were captured
157
(i.e. the yield of the pcre_exec call, unless
158
that was zero, in which case it should be 1/3
159
of the offset table size)
160
stringname the name of the required substring
161
buffer where to put the substring
162
size the size of the buffer
164
Returns: if successful:
165
the length of the copied string, not including the zero
166
that is put on the end; can be zero
168
PCRE_ERROR_NOMEMORY (-6) buffer too small
169
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
173
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
174
int stringcount, const char *stringname, char *buffer, int size)
176
int n = pcre_get_stringnumber(code, stringname);
177
if (n <= 0) return n;
178
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
183
/*************************************************
184
* Copy all captured strings to new store *
185
*************************************************/
187
/* This function gets one chunk of store and builds a list of pointers and all
188
of the captured substrings in it. A NULL pointer is put on the end of the list.
191
subject the subject string that was matched
192
ovector pointer to the offsets table
193
stringcount the number of substrings that were captured
194
(i.e. the yield of the pcre_exec call, unless
195
that was zero, in which case it should be 1/3
196
of the offset table size)
197
listptr set to point to the list of pointers
199
Returns: if successful: 0
201
PCRE_ERROR_NOMEMORY (-6) failed to get store
205
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
206
const char ***listptr)
209
int size = sizeof(char *);
210
int double_count = stringcount * 2;
214
for (i = 0; i < double_count; i += 2)
215
size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
217
stringlist = (char **)(pcre_malloc)(size);
218
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
220
*listptr = (const char **)stringlist;
221
p = (char *)(stringlist + stringcount + 1);
223
for (i = 0; i < double_count; i += 2)
225
int len = ovector[i+1] - ovector[i];
226
memcpy(p, subject + ovector[i], len);
238
/*************************************************
239
* Free store obtained by get_substring_list *
240
*************************************************/
242
/* This function exists for the benefit of people calling PCRE from non-C
243
programs that can call its functions, but not free() or (pcre_free)() directly.
245
Argument: the result of a previous pcre_get_substring_list()
250
pcre_free_substring_list(const char **pointer)
252
(pcre_free)((void *)pointer);
257
/*************************************************
258
* Copy captured string to new store *
259
*************************************************/
261
/* This function copies a single captured substring into a piece of new
265
subject the subject string that was matched
266
ovector pointer to the offsets table
267
stringcount the number of substrings that were captured
268
(i.e. the yield of the pcre_exec call, unless
269
that was zero, in which case it should be 1/3
270
of the offset table size)
271
stringnumber the number of the required substring
272
stringptr where to put a pointer to the substring
274
Returns: if successful:
275
the length of the string, not including the zero that
276
is put on the end; can be zero
278
PCRE_ERROR_NOMEMORY (-6) failed to get store
279
PCRE_ERROR_NOSUBSTRING (-7) substring not present
283
pcre_get_substring(const char *subject, int *ovector, int stringcount,
284
int stringnumber, const char **stringptr)
288
if (stringnumber < 0 || stringnumber >= stringcount)
289
return PCRE_ERROR_NOSUBSTRING;
291
yield = ovector[stringnumber+1] - ovector[stringnumber];
292
substring = (char *)(pcre_malloc)(yield + 1);
293
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
294
memcpy(substring, subject + ovector[stringnumber], yield);
295
substring[yield] = 0;
296
*stringptr = substring;
302
/*************************************************
303
* Copy named captured string to new store *
304
*************************************************/
306
/* This function copies a single captured substring, identified by name, into
310
code the compiled regex
311
subject the subject string that was matched
312
ovector pointer to the offsets table
313
stringcount the number of substrings that were captured
314
(i.e. the yield of the pcre_exec call, unless
315
that was zero, in which case it should be 1/3
316
of the offset table size)
317
stringname the name of the required substring
318
stringptr where to put the pointer
320
Returns: if successful:
321
the length of the copied string, not including the zero
322
that is put on the end; can be zero
324
PCRE_ERROR_NOMEMORY (-6) couldn't get memory
325
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
329
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
330
int stringcount, const char *stringname, const char **stringptr)
332
int n = pcre_get_stringnumber(code, stringname);
333
if (n <= 0) return n;
334
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
340
/*************************************************
341
* Free store obtained by get_substring *
342
*************************************************/
344
/* This function exists for the benefit of people calling PCRE from non-C
345
programs that can call its functions, but not free() or (pcre_free)() directly.
347
Argument: the result of a previous pcre_get_substring()
352
pcre_free_substring(const char *pointer)
354
(pcre_free)((void *)pointer);