20
20
#ifndef BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP
21
21
#define BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP
25
#pragma warning(disable: 4103)
23
27
#ifdef BOOST_HAS_ABI_HEADERS
24
28
# include BOOST_ABI_PREFIX
27
34
#ifdef __BORLANDC__
28
35
# pragma option push -w-8008 -w-8066
38
# pragma warning(push)
39
# pragma warning(disable: 4800)
32
43
namespace re_detail{
34
45
template <class BidiIterator, class Allocator, class traits>
35
perl_matcher<BidiIterator, Allocator, traits>::perl_matcher(BidiIterator first, BidiIterator end,
36
match_results<BidiIterator, Allocator>& what,
37
const basic_regex<char_type, traits>& e,
40
: m_result(what), base(first), last(end),
41
position(first), backstop(b), re(e), traits_inst(e.get_traits()),
42
m_independent(false), next_count(&rep_obj), rep_obj(&next_count)
47
template <class BidiIterator, class Allocator, class traits>
48
46
void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_regex<char_type, traits>& e, match_flag_type f)
50
48
typedef typename regex_iterator_traits<BidiIterator>::iterator_category category;
49
typedef typename basic_regex<char_type, traits>::flag_type expression_flag_type;
60
icase = re.flags() & regex_constants::icase;
61
59
estimate_max_state_count(static_cast<category*>(0));
60
expression_flag_type re_f = re.flags();
61
icase = re_f & regex_constants::icase;
62
62
if(!(m_match_flags & (match_perl|match_posix)))
64
if((re.flags() & (regbase::main_option_type|regbase::no_perl_ex)) == 0)
64
if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0)
65
65
m_match_flags |= match_perl;
66
else if((re.flags() & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
66
else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
67
67
m_match_flags |= match_perl;
69
69
m_match_flags |= match_posix;
80
80
m_backup_state = 0;
82
82
// find the value to use for matching word boundaries:
83
const char_type w = static_cast<char_type>('w');
84
m_word_mask = traits_inst.lookup_classname(&w, &w+1);
83
m_word_mask = re.get_data().m_word_mask;
85
84
// find bitmask to use for matching '.':
86
85
match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? re_detail::test_not_newline : re_detail::test_newline);
89
88
template <class BidiIterator, class Allocator, class traits>
90
89
void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std::random_access_iterator_tag*)
92
static const difference_type k = 100000;
93
difference_type dist = boost::re_detail::distance(base, last);
94
traits_size_type states = static_cast<traits_size_type>(re.size());
92
// How many states should we allow our machine to visit before giving up?
93
// This is a heuristic: it takes the greater of O(N^2) and O(NS^2)
94
// where N is the length of the string, and S is the number of states
95
// in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2)
96
// but these take unreasonably amounts of time to bale out in pathological
99
// Calculate NS^2 first:
101
static const std::ptrdiff_t k = 100000;
102
std::ptrdiff_t dist = boost::re_detail::distance(base, last);
105
std::ptrdiff_t states = re.size();
96
difference_type lim = ((std::numeric_limits<difference_type>::max)() - k) / states;
98
max_state_count = (std::numeric_limits<difference_type>::max)();
100
max_state_count = k + states * dist;
109
if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states)
111
max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
115
if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states)
117
max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
122
max_state_count = states;
125
// Now calculate N^2:
128
if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states)
130
max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
134
if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states)
136
max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
141
// N^2 can be a very large number indeed, to prevent things getting out
142
// of control, cap the max states:
144
if(states > BOOST_REGEX_MAX_STATE_COUNT)
145
states = BOOST_REGEX_MAX_STATE_COUNT;
147
// If (the possibly capped) N^2 is larger than our first estimate,
150
if(states > max_state_count)
151
max_state_count = states;
103
154
template <class BidiIterator, class Allocator, class traits>
104
void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
155
inline void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
106
157
// we don't know how long the sequence is:
107
158
max_state_count = BOOST_REGEX_MAX_STATE_COUNT;
110
161
#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
111
162
template <class BidiIterator, class Allocator, class traits>
112
bool perl_matcher<BidiIterator, Allocator, traits>::protected_call(
163
inline bool perl_matcher<BidiIterator, Allocator, traits>::protected_call(
113
164
protected_proc_type proc)
117
return (this->*proc)();
118
}__except(EXCEPTION_STACK_OVERFLOW == GetExceptionCode())
120
reset_stack_guard_page();
122
// we only get here after a stack overflow:
123
raise_error<traits>(traits_inst, regex_constants::error_size);
124
// and we never really get here at all:
127
166
::boost::re_detail::concrete_protected_call
128
167
<perl_matcher<BidiIterator, Allocator, traits> >
135
174
template <class BidiIterator, class Allocator, class traits>
136
bool perl_matcher<BidiIterator, Allocator, traits>::match()
175
inline bool perl_matcher<BidiIterator, Allocator, traits>::match()
138
177
#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
139
178
return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::match_imp);
161
200
m_match_flags |= regex_constants::match_all;
162
201
m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last);
163
202
m_presult->set_base(base);
203
m_presult->set_named_subs(re_detail::convert_to_named_subs<typename match_results<BidiIterator>::char_type>(this->re.get_named_subs()));
164
204
if(m_match_flags & match_posix)
165
205
m_result = *m_presult;
166
206
verify_options(re.flags(), m_match_flags);
167
207
if(0 == match_prefix())
169
return m_result[0].second == last;
209
return (m_result[0].second == last) && (m_result[0].first == base);
171
211
#if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS)
184
224
template <class BidiIterator, class Allocator, class traits>
185
bool perl_matcher<BidiIterator, Allocator, traits>::find()
225
inline bool perl_matcher<BidiIterator, Allocator, traits>::find()
187
227
#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
188
228
return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::find_imp);
307
348
template <class BidiIterator, class Allocator, class traits>
308
bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark()
310
int index = static_cast<const re_brace*>(pstate)->index;
313
if((m_match_flags & match_nosubs) == 0)
314
m_presult->set_second(position, index);
316
else if((index < 0) && (index != -4))
318
// matched forward lookahead:
322
pstate = pstate->next.p;
326
template <class BidiIterator, class Allocator, class traits>
327
349
bool perl_matcher<BidiIterator, Allocator, traits>::match_literal()
329
351
unsigned int len = static_cast<const re_literal*>(pstate)->length;
426
448
template <class BidiIterator, class Allocator, class traits>
427
bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
429
if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first))
431
if((m_match_flags & match_all) && (position != last))
433
if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base))
435
m_presult->set_second(position);
437
m_has_found_match = true;
438
if((m_match_flags & match_posix) == match_posix)
440
m_result.maybe_assign(*m_presult);
441
if((m_match_flags & match_any) == 0)
444
#ifdef BOOST_REGEX_MATCH_EXTRA
445
if(match_extra & m_match_flags)
447
for(unsigned i = 0; i < m_presult->size(); ++i)
448
if((*m_presult)[i].matched)
449
((*m_presult)[i]).get_captures().push_back((*m_presult)[i]);
455
template <class BidiIterator, class Allocator, class traits>
456
449
bool perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary()
458
451
bool b; // indcates whether next character is a word character
589
582
template <class BidiIterator, class Allocator, class traits>
590
583
bool perl_matcher<BidiIterator, Allocator, traits>::match_backref()
592
// compare with what we previously matched:
586
// Compare with what we previously matched.
587
// Note that this succeeds if the backref did not partisipate
588
// in the match, this is in line with ECMAScript, but not Perl
593
591
BidiIterator i = (*m_presult)[static_cast<const re_brace*>(pstate)->index].first;
594
592
BidiIterator j = (*m_presult)[static_cast<const re_brace*>(pstate)->index].second;
714
712
inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref()
716
714
// return true if marked sub-expression N has been matched:
717
bool result = (*m_presult)[static_cast<const re_brace*>(pstate)->index].matched;
718
pstate = pstate->next.p;
715
int index = static_cast<const re_brace*>(pstate)->index;
719
// Magic value for a (DEFINE) block:
724
// Check if index is a hash value:
726
index = re.get_data().get_id(index);
727
// Have we matched subexpression "index"?
728
result = (*m_presult)[index].matched;
729
pstate = pstate->next.p;
733
// Have we recursed into subexpression "index"?
734
// If index == 0 then check for any recursion at all, otherwise for recursion to -index-1.
737
idx = re.get_data().get_id(idx);
738
result = !recursion_stack.empty() && ((recursion_stack.back().idx == idx) || (index == 0));
739
pstate = pstate->next.p;