1.2.4
by Andrés Roldán
Import upstream version 3.4.0 |
1 |
/*--------------------------------------------------------------------*/
|
2 |
/*--- A simple sequence matching facility. ---*/
|
|
3 |
/*--- m_seqmatch.c ---*/
|
|
4 |
/*--------------------------------------------------------------------*/
|
|
5 |
||
6 |
/*
|
|
7 |
This file is part of Valgrind, a dynamic binary instrumentation
|
|
8 |
framework.
|
|
9 |
||
10 |
Copyright (C) 2008-2012 OpenWorks Ltd
|
|
1.1.16
by Benjamin Kerensa
Import upstream version 3.8.1 |
11 |
info@open-works.co.uk
|
1.2.4
by Andrés Roldán
Import upstream version 3.4.0 |
12 |
|
13 |
This program is free software; you can redistribute it and/or
|
|
14 |
modify it under the terms of the GNU General Public License as
|
|
15 |
published by the Free Software Foundation; either version 2 of the
|
|
16 |
License, or (at your option) any later version.
|
|
17 |
||
18 |
This program is distributed in the hope that it will be useful, but
|
|
19 |
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
20 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
21 |
General Public License for more details.
|
|
22 |
||
23 |
You should have received a copy of the GNU General Public License
|
|
24 |
along with this program; if not, write to the Free Software
|
|
25 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
26 |
02111-1307, USA.
|
|
27 |
||
28 |
The GNU General Public License is contained in the file COPYING.
|
|
29 |
*/
|
|
30 |
||
31 |
#include "pub_core_basics.h" |
|
32 |
#include "pub_core_libcassert.h" |
|
33 |
#include "pub_core_libcbase.h" // VG_(strlen) |
|
34 |
#include "pub_core_seqmatch.h" // self |
|
35 |
||
36 |
/* ---------------------------------------------------------------------
|
|
37 |
A simple sequence matching facility
|
|
38 |
------------------------------------------------------------------ */
|
|
39 |
||
40 |
/* See detailed comment in include/pub_tool_seqmatch.h about this. */
|
|
41 |
Bool VG_(generic_match) ( |
|
42 |
Bool matchAll, |
|
43 |
void* patt, SizeT szbPatt, UWord nPatt, UWord ixPatt, |
|
44 |
void* input, SizeT szbInput, UWord nInput, UWord ixInput, |
|
45 |
Bool (*pIsStar)(void*), |
|
46 |
Bool (*pIsQuery)(void*), |
|
47 |
Bool (*pattEQinp)(void*,void*,void*,UWord), |
|
1.1.16
by Benjamin Kerensa
Import upstream version 3.8.1 |
48 |
void* inputCompleter |
49 |
)
|
|
1.2.4
by Andrés Roldán
Import upstream version 3.4.0 |
50 |
{
|
51 |
/* This is the spec, written in my favourite formal specification
|
|
52 |
language. It specifies non-greedy matching of '*'s.
|
|
53 |
||
54 |
ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
|
|
55 |
ma ('*':ps) [] = ma ps []
|
|
56 |
||
57 |
ma ('?':ps) (i:is) = ma ps is
|
|
58 |
ma ('?':ps) [] = False
|
|
59 |
||
60 |
ma (p:ps) (i:is) = p == i && ma ps is
|
|
61 |
||
62 |
ma (p:ps) [] = False
|
|
63 |
ma [] (i:is) = False -- m-all, True for m-prefix
|
|
64 |
ma [] [] = True
|
|
65 |
*/
|
|
66 |
Bool havePatt, haveInput; |
|
67 |
void *currPatt, *currInput; |
|
68 |
tailcall: |
|
69 |
vg_assert(nPatt >= 0 && nPatt < 1000000); /* arbitrary */ |
|
70 |
vg_assert(nInput >= 0 && nInput < 1000000); /* arbitrary */ |
|
71 |
vg_assert(ixPatt >= 0 && ixPatt <= nPatt); |
|
72 |
vg_assert(ixInput >= 0 && ixInput <= nInput); |
|
73 |
||
74 |
havePatt = ixPatt < nPatt; |
|
75 |
haveInput = ixInput < nInput; |
|
76 |
||
77 |
/* No specific need to set NULL when !have{Patt,Input}, but guards
|
|
78 |
against inadvertantly dereferencing an out of range pointer to
|
|
79 |
the pattern or input arrays. */
|
|
80 |
currPatt = havePatt ? ((Char*)patt) + szbPatt * ixPatt : NULL; |
|
81 |
currInput = haveInput ? ((Char*)input) + szbInput * ixInput : NULL; |
|
82 |
||
83 |
// Deal with the complex case first: wildcards. Do frugal
|
|
84 |
// matching. When encountering a '*', first skip no characters
|
|
85 |
// at all, and see if the rest of the match still works. Only if
|
|
86 |
// that fails do we then skip a character, and retry at the next
|
|
87 |
// position.
|
|
88 |
//
|
|
89 |
// ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
|
|
90 |
//
|
|
91 |
// If we're out of input, check the rest of the pattern matches
|
|
92 |
// the empty input. This really means can only be be empty or
|
|
93 |
// composed entirely of '*'s.
|
|
94 |
//
|
|
95 |
// ma ('*':ps) [] = ma ps []
|
|
96 |
//
|
|
97 |
if (havePatt && pIsStar(currPatt)) { |
|
98 |
if (haveInput) { |
|
99 |
// ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
|
|
100 |
// we unavoidably have to make a real recursive call for the
|
|
101 |
// first half of the OR, since this isn't straight tail-recursion.
|
|
102 |
if (VG_(generic_match)( matchAll, |
|
103 |
patt, szbPatt, nPatt, ixPatt+1, |
|
104 |
input,szbInput,nInput, ixInput+0, |
|
105 |
pIsStar,pIsQuery,pattEQinp, |
|
1.1.16
by Benjamin Kerensa
Import upstream version 3.8.1 |
106 |
inputCompleter) ) { |
107 |
return True; |
|
1.2.4
by Andrés Roldán
Import upstream version 3.4.0 |
108 |
}
|
109 |
// but we can tail-recurse for the second call
|
|
110 |
ixInput++; goto tailcall; |
|
111 |
} else { |
|
112 |
// ma ('*':ps) [] = ma ps []
|
|
113 |
ixPatt++; goto tailcall; |
|
114 |
}
|
|
115 |
}
|
|
116 |
||
117 |
// simpler cases now. Deal with '?' wildcards.
|
|
118 |
//
|
|
119 |
// ma ('?':ps) (i:is) = ma ps is
|
|
120 |
// ma ('?':ps) [] = False
|
|
121 |
if (havePatt && pIsQuery(currPatt)) { |
|
122 |
if (haveInput) { |
|
123 |
ixPatt++; ixInput++; goto tailcall; |
|
124 |
} else { |
|
125 |
return False; |
|
126 |
}
|
|
127 |
}
|
|
128 |
||
129 |
// obvious case with literal chars in the pattern
|
|
130 |
//
|
|
131 |
// ma (p:ps) (i:is) = p == i && ma ps is
|
|
132 |
if (havePatt && haveInput) { |
|
133 |
if (!pattEQinp(currPatt,currInput,inputCompleter,ixInput)) return False; |
|
1.1.16
by Benjamin Kerensa
Import upstream version 3.8.1 |
134 |
ixPatt++; ixInput++; goto tailcall; |
1.2.4
by Andrés Roldán
Import upstream version 3.4.0 |
135 |
}
|
136 |
||
137 |
// if we run out of input before we run out of pattern, we must fail
|
|
138 |
// ma (p:ps) [] = False
|
|
139 |
if (havePatt && !haveInput) return False; |
|
140 |
||
141 |
// if we run out of pattern before we run out of input, the
|
|
142 |
// verdict depends on the matching mode. If we are trying to
|
|
143 |
// match exactly (the pattern must consume the entire input)
|
|
144 |
// then the outcome is failure. However, if we're merely attempting
|
|
145 |
// to match some prefix of the input, then we have been successful.
|
|
146 |
//
|
|
147 |
// ma [] (i:is) = False -- m-all, True for m-prefix
|
|
148 |
if (!havePatt && haveInput) { |
|
149 |
return matchAll ? False // match-all |
|
150 |
: True; // match-prefix |
|
151 |
}
|
|
152 |
||
153 |
// finally, if both sequence and input are both completely
|
|
154 |
// consumed, then we were successful, regardless of matching mode.
|
|
155 |
if (!havePatt && !haveInput) return True; |
|
156 |
||
157 |
// end of cases
|
|
158 |
vg_assert(0); |
|
159 |
}
|
|
160 |
||
161 |
||
162 |
/* And a parameterization of the above, to make it do
|
|
163 |
string matching.
|
|
164 |
*/
|
|
165 |
static Bool charIsStar ( void* pV ) { return *(Char*)pV == '*'; } |
|
166 |
static Bool charIsQuery ( void* pV ) { return *(Char*)pV == '?'; } |
|
167 |
static Bool char_p_EQ_i ( void* pV, void* cV, |
|
1.1.16
by Benjamin Kerensa
Import upstream version 3.8.1 |
168 |
void* null_completer, UWord ixcV ) { |
169 |
Char p = *(Char*)pV; |
|
1.2.4
by Andrés Roldán
Import upstream version 3.4.0 |
170 |
Char c = *(Char*)cV; |
171 |
vg_assert(p != '*' && p != '?'); |
|
172 |
return p == c; |
|
173 |
}
|
|
174 |
Bool VG_(string_match) ( const Char* patt, const Char* input ) |
|
175 |
{
|
|
176 |
return VG_(generic_match)( |
|
177 |
True/* match-all */, |
|
178 |
(void*)patt, sizeof(UChar), VG_(strlen)(patt), 0, |
|
179 |
(void*)input, sizeof(UChar), VG_(strlen)(input), 0, |
|
180 |
charIsStar, charIsQuery, char_p_EQ_i, |
|
1.1.16
by Benjamin Kerensa
Import upstream version 3.8.1 |
181 |
NULL
|
182 |
);
|
|
1.2.4
by Andrés Roldán
Import upstream version 3.4.0 |
183 |
}
|
184 |
||
185 |
||
186 |
// test cases for the matcher (in match-all mode)
|
|
187 |
// typedef struct { char* patt; char* input; Bool xres; } Test;
|
|
188 |
//
|
|
189 |
//static Test tests[] =
|
|
190 |
// {
|
|
191 |
// { "" ,"" , True },
|
|
192 |
// { "a" ,"" , False },
|
|
193 |
// { "a" ,"b" , False },
|
|
194 |
// { "a" ,"a" , True },
|
|
195 |
// { "a" ,"aa" , False },
|
|
196 |
// { "*" ,"" , True },
|
|
197 |
// { "**" ,"" , True },
|
|
198 |
// { "*" ,"abc", True },
|
|
199 |
// { "*a" ,"abc", False },
|
|
200 |
// { "*b" ,"abc", False },
|
|
201 |
// { "*bc" ,"abc", True },
|
|
202 |
// { "a*b" ,"abc", False },
|
|
203 |
// { "a*c" ,"abc", True },
|
|
204 |
// { "*c" ,"abc", True },
|
|
205 |
// { "c*c" ,"abc", False },
|
|
206 |
// { "abc*" ,"abc", True },
|
|
207 |
// { "abc**" ,"abc", True },
|
|
208 |
// { "**abc" ,"abc", True },
|
|
209 |
// { "**a*b*c**" ,"abc", True },
|
|
210 |
// { "**a*b*d**" ,"abc", False },
|
|
211 |
// { "a?b" ,"abc", False },
|
|
212 |
// { "a?c" ,"abc", True },
|
|
213 |
// { "?" ,"" , False },
|
|
214 |
// { "?" ,"a" , True },
|
|
215 |
// { "?" ,"ab" , False },
|
|
216 |
// { "abcd" ,"abc", False },
|
|
217 |
// { "ab" ,"abc", False },
|
|
218 |
// { NULL ,NULL , False }
|
|
219 |
// };
|
|
220 |
//
|
|
221 |
//int main ( void )
|
|
222 |
//{
|
|
223 |
// Test* t;
|
|
224 |
// for (t = tests; t->patt; t++) {
|
|
225 |
// printf("%10s %6s %s\n",
|
|
226 |
// t->patt, t->input,
|
|
227 |
// match_string_all((UChar*)t->patt,(UChar*)t->input,True)
|
|
228 |
// == t->xres
|
|
229 |
// ? "pass" : "FAIL" );
|
|
230 |
// }
|
|
231 |
// return 0;
|
|
232 |
//}
|
|
233 |
||
234 |
/*--------------------------------------------------------------------*/
|
|
235 |
/*--- end m_seqmatch.c ---*/
|
|
236 |
/*--------------------------------------------------------------------*/
|
|
237 |