3
* This file contains implementation for a lightweight wrapper around
4
* libc regex, providing regular expression match and replace facility.
6
* @remark Copyright 2003 OProfile authors
7
* @remark Read the file COPYING
8
* @remark Idea comes from TextFilt project <http://textfilt.sourceforge.net>
10
* @author Philippe Elie
18
#include "string_manip.h"
26
string op_regerror(int err, regex_t const & regexp)
28
size_t needed_size = regerror(err, ®exp, 0, 0);
29
char * buffer = new char[needed_size];
30
regerror(err, ®exp, buffer, needed_size);
36
void op_regcomp(regex_t & regexp, string const & pattern)
38
int err = regcomp(®exp, pattern.c_str(), REG_EXTENDED);
40
throw bad_regex("regcomp error: " + op_regerror(err, regexp)
41
+ " for pattern : " + pattern);
46
bool op_regexec(regex_t const & regex, string const & str, regmatch_t * match,
49
return regexec(®ex, str.c_str(), nmatch, match, 0) != REG_NOMATCH;
53
void op_regfree(regex_t & regexp)
59
// return the index number associated with a char seen in a "\x".
60
// Allowed range are for x is [0-9a-z] return size_t(-1) if x is not in
62
size_t subexpr_index(char ch)
66
if (ch >= 'a' && ch <= 'z')
71
} // anonymous namespace
74
bad_regex::bad_regex(string const & pattern)
75
: op_exception(pattern)
80
regular_expression_replace::regular_expression_replace(size_t limit_,
84
limit_defs_expansion(limit_defs)
89
regular_expression_replace::~regular_expression_replace()
91
for (size_t i = 0 ; i < regex_replace.size() ; ++i)
92
op_regfree(regex_replace[i].regexp);
96
void regular_expression_replace::add_definition(string const & name,
97
string const & definition)
99
defs[name] = expand_string(definition);
103
void regular_expression_replace::add_pattern(string const & pattern,
104
string const & replace)
106
string expanded_pattern = expand_string(pattern);
109
op_regcomp(regexp, expanded_pattern);
110
replace_t regex = { regexp, replace };
111
regex_replace.push_back(regex);
115
string regular_expression_replace::expand_string(string const & input)
117
string last, expanded(input);
119
for (i = 0 ; i < limit_defs_expansion ; ++i) {
121
expanded = substitute_definition(last);
122
if (expanded == last)
126
if (i == limit_defs_expansion)
127
throw bad_regex("too many substitution for: + input");
133
string regular_expression_replace::substitute_definition(string const & pattern)
136
bool previous_is_escape = false;
138
for (size_t i = 0 ; i < pattern.length() ; ++i) {
139
if (pattern[i] == '$' && !previous_is_escape) {
140
size_t pos = pattern.find('{', i);
142
throw bad_regex("invalid $ in pattern: " + pattern);
144
size_t end = pattern.find('}', i);
145
if (end == string::npos) {
146
throw bad_regex("no matching '}' in pattern: " + pattern);
148
string def_name = pattern.substr(pos+1, (end-pos) - 1);
149
if (defs.find(def_name) == defs.end()) {
150
throw bad_regex("definition not found and used in pattern: (" + def_name + ") " + pattern);
152
result += defs[def_name];
155
if (pattern[i] == '\\' && !previous_is_escape)
156
previous_is_escape = true;
158
previous_is_escape = false;
159
result += pattern[i];
167
// FIXME limit output string size ? (cause we can have exponential growing
168
// of output string through a rule "a" = "aa")
169
bool regular_expression_replace::execute(string & str) const
172
for (size_t nr_iter = 0; changed && nr_iter < limit ; ++nr_iter) {
174
for (size_t i = 0 ; i < regex_replace.size() ; ++i) {
175
if (do_execute(str, regex_replace[i]))
180
// this don't return if the input string has been changed but if
181
// we reach the limit number of iteration.
182
return changed == false;
186
bool regular_expression_replace::do_execute(string & str,
187
replace_t const & regexp) const
189
bool changed = false;
191
regmatch_t match[max_match];
192
for (size_t iter = 0;
193
op_regexec(regexp.regexp, str, match, max_match) && iter < limit;
196
do_replace(str, regexp.replace, match);
204
regular_expression_replace::get_match(regmatch_t const * match, char idx) const
206
size_t sub_expr = subexpr_index(idx);
207
if (sub_expr == size_t(-1))
208
throw bad_regex("expect group index: " + idx);
209
if (sub_expr >= max_match)
210
throw bad_regex("illegal group index :" + idx);
211
return match[sub_expr];
214
void regular_expression_replace::do_replace
215
(string & str, string const & replace, regmatch_t const * match) const
218
for (size_t i = 0 ; i < replace.length() ; ++i) {
219
if (replace[i] == '\\') {
220
if (i == replace.length() - 1) {
221
throw bad_regex("illegal \\ trailer: " +
225
if (replace[i] == '\\') {
228
regmatch_t const & matched = get_match(match,
230
if (matched.rm_so == -1 &&
231
matched.rm_eo == -1) {
232
// empty match: nothing todo
233
} else if (matched.rm_so == -1 ||
234
matched.rm_eo == -1) {
235
throw bad_regex("illegal match: " +
238
inserted += str.substr(matched.rm_so,
239
matched.rm_eo - matched.rm_so);
243
inserted += replace[i];
247
size_t first = match[0].rm_so;
248
size_t count = match[0].rm_eo - match[0].rm_so;
250
str.replace(first, count, inserted);
254
void setup_regex(regular_expression_replace & regex,
255
string const & filename)
257
ifstream in(filename.c_str());
259
throw op_runtime_error("Can't open file " + filename +
260
" for reading", errno);
263
regular_expression_replace var_name_rule;
264
var_name_rule.add_pattern("^\\$([_a-zA-Z][_a-zA-Z0-9]*)[ ]*=.*", "\\1");
265
regular_expression_replace var_value_rule;
266
var_value_rule.add_pattern(".*=[ ]*\"(.*)\"", "\\1");
268
regular_expression_replace left_rule;
269
left_rule.add_pattern("[ ]*\"(.*)\"[ ]*=.*", "\\1");
270
regular_expression_replace right_rule;
271
right_rule.add_pattern(".*=[ ]*\"(.*)\"", "\\1");
274
while (getline(in, line)) {
276
if (line.empty() || line[0] == '#')
280
var_name_rule.execute(temp);
283
left_rule.execute(left);
285
throw bad_regex("invalid input file: " +
290
right_rule.execute(right);
292
throw bad_regex("invalid input file: "
296
regex.add_pattern(left, right);
298
// temp != line ==> var_name_rule succeed to substitute
299
// into temp the var_name present in line
300
string var_name = temp;
301
string var_value = line;
302
var_value_rule.execute(var_value);
303
if (var_value == line) {
304
throw bad_regex("invalid input file: " +
308
regex.add_definition(var_name, var_value);