1
//===-- Regex.cpp - Regular Expression matcher implementation -------------===//
3
// The LLVM Compiler Infrastructure
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
8
//===----------------------------------------------------------------------===//
10
// This file implements a POSIX regular expression matcher.
12
//===----------------------------------------------------------------------===//
14
#include "llvm/Support/Regex.h"
15
#include "llvm/Support/ErrorHandling.h"
16
#include "llvm/Support/raw_ostream.h"
17
#include "llvm/ADT/SmallVector.h"
18
#include "regex_impl.h"
22
Regex::Regex(const StringRef ®ex, unsigned Flags) {
24
preg = new llvm_regex();
25
preg->re_endp = regex.end();
26
if (Flags & IgnoreCase)
30
error = llvm_regcomp(preg, regex.data(), flags|REG_EXTENDED|REG_PEND);
38
bool Regex::isValid(std::string &Error) {
42
size_t len = llvm_regerror(error, preg, NULL, 0);
45
llvm_regerror(error, preg, &Error[0], len);
49
/// getNumMatches - In a valid regex, return the number of parenthesized
50
/// matches it contains.
51
unsigned Regex::getNumMatches() const {
55
bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){
56
unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
58
// pmatch needs to have at least one element.
59
SmallVector<llvm_regmatch_t, 8> pm;
60
pm.resize(nmatch > 0 ? nmatch : 1);
62
pm[0].rm_eo = String.size();
64
int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND);
66
if (rc == REG_NOMATCH)
69
// regexec can fail due to invalid pattern or running out of memory.
76
if (Matches) { // match position requested
79
for (unsigned i = 0; i != nmatch; ++i) {
80
if (pm[i].rm_so == -1) {
81
// this group didn't match
82
Matches->push_back(StringRef());
85
assert(pm[i].rm_eo > pm[i].rm_so);
86
Matches->push_back(StringRef(String.data()+pm[i].rm_so,
87
pm[i].rm_eo-pm[i].rm_so));
94
std::string Regex::sub(StringRef Repl, StringRef String,
96
SmallVector<StringRef, 8> Matches;
98
// Reset error, if given.
99
if (Error && !Error->empty()) *Error = "";
101
// Return the input if there was no match.
102
if (!match(String, &Matches))
105
// Otherwise splice in the replacement string, starting with the prefix before
107
std::string Res(String.begin(), Matches[0].begin());
109
// Then the replacement string, honoring possible substitutions.
110
while (!Repl.empty()) {
111
// Skip to the next escape.
112
std::pair<StringRef, StringRef> Split = Repl.split('\\');
114
// Add the skipped substring.
117
// Check for terminimation and trailing backslash.
118
if (Split.second.empty()) {
119
if (Repl.size() != Split.first.size() &&
120
Error && Error->empty())
121
*Error = "replacement string contained trailing backslash";
125
// Otherwise update the replacement string and interpret escapes.
128
// FIXME: We should have a StringExtras function for mapping C99 escapes.
130
// Treat all unrecognized characters as self-quoting.
133
Repl = Repl.substr(1);
136
// Single character escapes.
139
Repl = Repl.substr(1);
143
Repl = Repl.substr(1);
146
// Decimal escapes are backreferences.
147
case '0': case '1': case '2': case '3': case '4':
148
case '5': case '6': case '7': case '8': case '9': {
149
// Extract the backreference number.
150
StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789"));
151
Repl = Repl.substr(Ref.size());
154
if (!Ref.getAsInteger(10, RefValue) &&
155
RefValue < Matches.size())
156
Res += Matches[RefValue];
157
else if (Error && Error->empty())
158
*Error = "invalid backreference string '" + Ref.str() + "'";
164
// And finally the suffix.
165
Res += StringRef(Matches[0].end(), String.end() - Matches[0].end());