3
* Copyright (c) 1998-2002
6
* Permission to use, copy, modify, distribute and sell this software
7
* and its documentation for any purpose is hereby granted without fee,
8
* provided that the above copyright notice appear in all copies and
9
* that both that copyright notice and this permission notice appear
10
* in supporting documentation. Dr John Maddock makes no representations
11
* about the suitability of this software for any purpose.
12
* It is provided "as is" without express or implied warranty.
17
* LOCATION: see http://www.boost.org for most recent version.
18
* FILE regex_compile.hpp
19
* VERSION see <boost/version.hpp>
20
* DESCRIPTION: Declares reg_expression<> member functions. This is
21
* an internal header file, do not include directly.
24
#ifndef BOOST_REGEX_COMPILE_HPP
25
#define BOOST_REGEX_COMPILE_HPP
29
#pragma option push -a8 -b -Vx -Ve -pc -w-8004
34
template <class traits>
37
typedef typename traits::char_type char_type;
40
kmp_translator(bool c, traits* p) : icase(c), pt(p) {}
41
char_type operator()(char_type c)
43
return pt->translate(c, icase);
48
template <class charT, class traits_type, class Allocator>
49
bool BOOST_REGEX_CALL re_maybe_set_member(charT c,
50
const re_set_long* set_,
51
const reg_expression<charT, traits_type, Allocator>& e)
53
const charT* p = reinterpret_cast<const charT*>(set_+1);
54
bool icase = e.flags() & regbase::icase;
55
charT col = e.get_traits().translate(c, icase);
56
for(unsigned int i = 0; i < set_->csingles; ++i)
59
return set_->isnot ? false : true;
64
return set_->isnot ? true : false;
67
} // namespace re_detail
70
template <class charT, class traits, class Allocator>
71
inline bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::can_start(charT c, const unsigned char* _map, unsigned char mask, const re_detail::_wide_type&)
73
if((traits_size_type)(traits_uchar_type)c >= 256)
75
return BOOST_REGEX_MAKE_BOOL(_map[(traits_uchar_type)c] & mask);
78
template <class charT, class traits, class Allocator>
79
inline bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::can_start(charT c, const unsigned char* _map, unsigned char mask, const re_detail::_narrow_type&)
81
return BOOST_REGEX_MAKE_BOOL(_map[(traits_uchar_type)c] & mask);
84
template <class charT, class traits, class Allocator>
85
reg_expression<charT, traits, Allocator>::reg_expression(const Allocator& a)
86
: regbase(), data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0)
90
template <class charT, class traits, class Allocator>
91
reg_expression<charT, traits, Allocator>::reg_expression(const charT* p, flag_type f, const Allocator& a)
92
: data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0)
94
set_expression(p, f | regbase::use_except);
97
template <class charT, class traits, class Allocator>
98
reg_expression<charT, traits, Allocator>::reg_expression(const charT* p1, const charT* p2, flag_type f, const Allocator& a)
99
: data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0)
101
set_expression(p1, p2, f | regbase::use_except);
104
template <class charT, class traits, class Allocator>
105
reg_expression<charT, traits, Allocator>::reg_expression(const charT* p, size_type len, flag_type f, const Allocator& a)
106
: data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0)
108
set_expression(p, p + len, f | regbase::use_except);
111
template <class charT, class traits, class Allocator>
112
reg_expression<charT, traits, Allocator>::reg_expression(const reg_expression<charT, traits, Allocator>& e)
113
: regbase(e), data(e.allocator()), pkmp(0), error_code_(REG_EMPTY), _expression(0)
116
// we do a deep copy only if e is a valid expression, otherwise fail.
118
if(e.error_code() == 0)
120
const charT* pe = e.expression();
121
set_expression(pe, pe + e._expression_len, e.flags() | regbase::use_except);
125
_flags = e.flags() & ~(regbase::use_except);
126
fail(e.error_code());
130
template <class charT, class traits, class Allocator>
131
reg_expression<charT, traits, Allocator>::~reg_expression()
134
re_detail::kmp_free(pkmp, data.allocator());
137
template <class charT, class traits, class Allocator>
138
reg_expression<charT, traits, Allocator>& BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::operator=(const reg_expression<charT, traits, Allocator>& e)
141
// we do a deep copy only if e is a valid expression, otherwise fail.
143
if(this == &e) return *this;
145
fail(e.error_code());
146
if(error_code() == 0)
147
set_expression(e._expression, e._expression + e._expression_len, e.flags() | regbase::use_except);
151
template <class charT, class traits, class Allocator>
152
inline bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::operator==(const reg_expression<charT, traits, Allocator>& e)const
154
return (_flags == e.flags())
155
&& (_expression_len == e._expression_len)
156
&& (std::memcmp(_expression, e._expression, _expression_len * sizeof(charT)) == 0);
159
template <class charT, class traits, class Allocator>
160
bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::operator<(const reg_expression<charT, traits, Allocator>& e)const
163
// we can't offer a diffinitive ordering, but we can be consistant:
164
if(_flags != e.flags()) return _flags < e.flags();
165
if(_expression_len != e._expression_len) return _expression_len < e._expression_len;
166
return std::memcmp(expression(), e.expression(), _expression_len);
169
template <class charT, class traits, class Allocator>
170
Allocator BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::allocator()const
172
return data.allocator();
175
template <class charT, class traits, class Allocator>
176
Allocator BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::get_allocator()const
178
return data.allocator();
181
template <class charT, class traits, class Allocator>
182
unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::parse_inner_set(const charT*& first, const charT* last)
185
// we have an inner [...] construct
187
jm_assert(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_open_set);
188
const charT* base = first;
189
while( (first != last)
190
&& (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) != traits_type::syntax_close_set) )
197
if(*(base+1) != *(first-2))
199
unsigned int result = traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*(base+1));
200
if((result == traits_type::syntax_colon) && ((first-base) == 5))
202
return traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*(base+2));
204
return ((result == traits_type::syntax_colon) || (result == traits_type::syntax_dot) || (result == traits_type::syntax_equal)) ? result : 0;
208
template <class charT, class traits, class Allocator>
209
bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::skip_space(const charT*& first, const charT* last)
212
// returns true if we get to last:
214
while((first != last) && (traits_inst.is_class(*first, traits_type::char_class_space) == true))
218
return first == last;
221
template <class charT, class traits, class Allocator>
222
void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::parse_range(const charT*& ptr, const charT* end, unsigned& min, unsigned& max)
225
// we have {x} or {x,} or {x,y} NB no spaces inside braces
226
// anything else is illegal
227
// On input ptr points to "{"
230
if(skip_space(ptr, end))
235
if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) != traits_type::syntax_digit)
240
min = traits_inst.toi(ptr, end, 10);
241
if(skip_space(ptr, end))
246
if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_comma)
248
//we have a second interval:
250
if(skip_space(ptr, end))
255
if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_digit)
256
max = traits_inst.toi(ptr, end, 10);
264
if(skip_space(ptr, end))
274
if(_flags & bk_braces)
276
if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) != traits_type::syntax_slash)
283
// back\ is OK now check the }
285
if((ptr == end) || (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) != traits_type::syntax_close_brace))
292
else if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) != traits_type::syntax_close_brace)
299
template <class charT, class traits, class Allocator>
300
charT BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::parse_escape(const charT*& first, const charT* last)
303
traits_size_type c_unsigned = (traits_size_type)(traits_uchar_type)*first;
304
// this is only used for the switch(), but cannot be folded in
305
// due to a bug in Comeau 4.2.44beta3
306
traits_size_type syntax = traits_inst.syntax_type(c_unsigned);
309
case traits_type::syntax_a:
313
case traits_type::syntax_f:
317
case traits_type::syntax_n:
321
case traits_type::syntax_r:
325
case traits_type::syntax_t:
329
case traits_type::syntax_v:
333
case traits_type::syntax_x:
340
// maybe have \x{ddd}
341
if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)(*first)) == traits_type::syntax_open_brace)
349
if(traits_inst.is_class(*first, traits_type::char_class_xdigit) == false)
354
c = (charT)traits_inst.toi(first, last, -16);
355
if((first == last) || (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)(*first)) != traits_type::syntax_close_brace))
364
if(traits_inst.is_class(*first, traits_type::char_class_xdigit) == false)
369
c = (charT)traits_inst.toi(first, last, -16);
372
case traits_type::syntax_c:
379
if(((traits_uchar_type)(*first) < (traits_uchar_type)'@')
380
|| ((traits_uchar_type)(*first) > (traits_uchar_type)127) )
385
c = (charT)((traits_uchar_type)(*first) - (traits_uchar_type)'@');
388
case traits_type::syntax_e:
392
case traits_type::syntax_digit:
393
c = (charT)traits_inst.toi(first, last, -8);
402
template <class charT, class traits, class Allocator>
403
void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_maps()
405
re_detail::re_syntax_base* record = static_cast<re_detail::re_syntax_base*>(data.data());
406
// always compile the first _map:
407
std::memset(startmap, 0, 256);
408
record->can_be_null = 0;
409
compile_map(record, startmap, 0, re_detail::mask_all);
411
while(record->type != re_detail::syntax_element_match)
413
if((record->type == re_detail::syntax_element_alt) || (record->type == re_detail::syntax_element_rep))
415
std::memset(&(static_cast<re_detail::re_jump*>(record)->_map), 0, 256);
416
record->can_be_null = 0;
417
compile_map(record->next.p, static_cast<re_detail::re_jump*>(record)->_map, &(record->can_be_null), re_detail::mask_take, static_cast<re_detail::re_jump*>(record)->alt.p);
418
compile_map(static_cast<re_detail::re_jump*>(record)->alt.p, static_cast<re_detail::re_jump*>(record)->_map, &(record->can_be_null), re_detail::mask_skip);
419
if(record->type == re_detail::syntax_element_rep)
421
re_detail::re_repeat* rep = static_cast<re_detail::re_repeat*>(record);
422
// set whether this is a singleton repeat or not:
423
if(rep->next.p->next.p->next.p == rep->alt.p)
425
rep->singleton = true;
428
rep->singleton = false;
433
record->can_be_null = 0;
434
compile_map(record, 0, &(record->can_be_null), re_detail::mask_all);
436
record = record->next.p;
438
record->can_be_null = re_detail::mask_all;
441
template <class charT, class traits, class Allocator>
442
bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start(
443
re_detail::re_syntax_base* node, charT cc, re_detail::re_syntax_base* terminal) const
449
case re_detail::syntax_element_startmark:
450
if(static_cast<const re_detail::re_brace*>(node)->index == -1)
452
return probe_start(node->next.p->next.p, cc, terminal)
453
&& probe_start(static_cast<const re_detail::re_jump*>(node->next.p)->alt.p, cc, terminal);
456
case re_detail::syntax_element_endmark:
457
case re_detail::syntax_element_start_line:
458
case re_detail::syntax_element_word_boundary:
459
case re_detail::syntax_element_buffer_start:
460
case re_detail::syntax_element_restart_continue:
461
// doesn't tell us anything about the next character, so:
462
return probe_start(node->next.p, cc, terminal);
463
case re_detail::syntax_element_literal:
464
// only the first character of the literal can match:
465
// note these have already been translated:
466
if(*reinterpret_cast<charT*>(static_cast<re_detail::re_literal*>(node)+1) == traits_inst.translate(cc, (_flags & regbase::icase)))
469
case re_detail::syntax_element_end_line:
470
// next character (if there is one!) must be a newline:
471
if(traits_inst.is_separator(traits_inst.translate(cc, (_flags & regbase::icase))))
474
case re_detail::syntax_element_wild:
476
case re_detail::syntax_element_match:
478
case re_detail::syntax_element_within_word:
479
case re_detail::syntax_element_word_start:
480
return traits_inst.is_class(traits_inst.translate(cc, (_flags & regbase::icase)), traits_type::char_class_word);
481
case re_detail::syntax_element_word_end:
482
// what follows must not be a word character,
483
return traits_inst.is_class(traits_inst.translate(cc, (_flags & regbase::icase)), traits_type::char_class_word) ? false : true;
484
case re_detail::syntax_element_buffer_end:
485
// we can be null, nothing must follow,
486
// NB we assume that this is followed by
487
// re_detail::syntax_element_match, if its not then we can
488
// never match anything anyway!!
490
case re_detail::syntax_element_soft_buffer_end:
491
// we can be null, only newlines must follow,
492
// NB we assume that this is followed by
493
// re_detail::syntax_element_match, if its not then we can
494
// never match anything anyway!!
495
return traits_inst.is_separator(traits_inst.translate(cc, (_flags & regbase::icase)));
496
case re_detail::syntax_element_backref:
497
// there's no easy way to determine this
498
// which is not to say it can't be done!
501
case re_detail::syntax_element_long_set:
502
// we can not be null,
503
// we need to add already translated values in the set
504
// to values in the _map
505
return re_detail::re_maybe_set_member(cc, static_cast<const re_detail::re_set_long*>(node), *this) || (re_detail::re_is_set_member(static_cast<const charT*>(&cc), static_cast<const charT*>(&cc+1), static_cast<re_detail::re_set_long*>(node), *this) != &cc);
506
case re_detail::syntax_element_set:
507
// set all the elements that are set in corresponding set:
508
c = (traits_size_type)(traits_uchar_type)traits_inst.translate(cc, (_flags & regbase::icase));
509
return static_cast<re_detail::re_set*>(node)->_map[c] != 0;
510
case re_detail::syntax_element_jump:
511
if(static_cast<re_detail::re_jump*>(node)->alt.p < node)
514
// caused only by end of repeat section, we'll treat this
515
// the same as a match, because the sub-expression has matched.
516
if(node->next.p == terminal)
517
return true; // null repeat - we can always take this
521
// take the jump, add in fix for the fact that if the
522
// repeat that we're jumping to has non-zero minimum count
523
// then we need to add in the possiblity that we could still
525
re_detail::re_syntax_base* next = static_cast<re_detail::re_jump*>(node)->alt.p;
526
bool b = probe_start(next, cc, terminal);
527
if((next->type == re_detail::syntax_element_rep) && (static_cast<re_detail::re_repeat*>(next)->min != 0))
529
b = b || probe_start(static_cast<re_detail::re_jump*>(next)->alt.p, cc, terminal);
535
// take the jump and compile:
536
return probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal);
537
case re_detail::syntax_element_alt:
538
// we need to take the OR of the two alternatives:
539
return probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal) || probe_start(node->next.p, cc, terminal);
540
case re_detail::syntax_element_rep:
541
// we need to take the OR of the two alternatives
542
if(static_cast<re_detail::re_repeat*>(node)->min == 0)
543
return probe_start(node->next.p, cc, static_cast<re_detail::re_jump*>(node)->alt.p) || probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal);
545
return probe_start(node->next.p, cc, static_cast<re_detail::re_jump*>(node)->alt.p);
546
case re_detail::syntax_element_combining:
547
return !traits_inst.is_combining(traits_inst.translate(cc, (_flags & regbase::icase)));
552
template <class charT, class traits, class Allocator>
553
bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start_null(re_detail::re_syntax_base* node, re_detail::re_syntax_base* terminal)const
557
case re_detail::syntax_element_startmark:
558
case re_detail::syntax_element_endmark:
559
case re_detail::syntax_element_start_line:
560
case re_detail::syntax_element_word_boundary:
561
case re_detail::syntax_element_buffer_start:
562
case re_detail::syntax_element_restart_continue:
563
case re_detail::syntax_element_end_line:
564
case re_detail::syntax_element_word_end:
565
// doesn't tell us anything about the next character, so:
566
return probe_start_null(node->next.p, terminal);
567
case re_detail::syntax_element_match:
568
case re_detail::syntax_element_buffer_end:
569
case re_detail::syntax_element_soft_buffer_end:
570
case re_detail::syntax_element_backref:
572
case re_detail::syntax_element_jump:
573
if(static_cast<re_detail::re_jump*>(node)->alt.p < node)
576
// caused only by end of repeat section, we'll treat this
577
// the same as a match, because the sub-expression has matched.
578
// this is only caused by NULL repeats as in "(a*)*" or "(\<)*"
579
// these are really nonsensence and make the matching code much
580
// harder, it would be nice to get rid of them altogether.
581
if(node->next.p == terminal)
584
return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);
587
// take the jump and compile:
588
return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);
589
case re_detail::syntax_element_alt:
590
// we need to take the OR of the two alternatives:
591
return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal) || probe_start_null(node->next.p, terminal);
592
case re_detail::syntax_element_rep:
593
// only need to consider skipping the repeat:
594
return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);
601
template <class charT, class traits, class Allocator>
602
void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_map(
603
re_detail::re_syntax_base* node, unsigned char* _map,
604
unsigned int* pnull, unsigned char mask, re_detail::re_syntax_base* terminal)const
608
for(unsigned int i = 0; i < 256; ++i)
610
if(probe_start(node, (charT)i, terminal))
614
if(pnull && probe_start_null(node, terminal))
618
template <class charT, class traits, class Allocator>
619
void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::move_offsets(re_detail::re_syntax_base* j, unsigned size)
622
# pragma warning(push)
623
# pragma warning(disable: 4127)
625
// move all offsets starting with j->link forward by size
626
// called after an insert:
627
j = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + j->next.i);
632
case re_detail::syntax_element_rep:
633
static_cast<re_detail::re_jump*>(j)->alt.i += size;
636
case re_detail::syntax_element_jump:
637
case re_detail::syntax_element_alt:
638
static_cast<re_detail::re_jump*>(j)->alt.i += size;
645
if(j->next.i == size)
647
j = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + j->next.i);
650
# pragma warning(pop)
654
template <class charT, class traits, class Allocator>
655
re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set_simple(re_detail::re_syntax_base* dat, unsigned long cls, bool isnot)
657
typedef typename re_detail::is_byte<charT>::width_type width_type;
658
re_detail::jstack<traits_string_type, Allocator> singles(64, data.allocator());
659
re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator());
660
re_detail::jstack<boost::uint_fast32_t, Allocator> classes(64, data.allocator());
661
re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator());
666
dat->next.i = data.size();
668
return compile_set_aux(singles, ranges, classes, equivalents, isnot, width_type());
671
template <class charT, class traits, class Allocator>
672
re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set(const charT*& first, const charT* last)
674
re_detail::jstack<traits_string_type, Allocator> singles(64, data.allocator());
675
re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator());
676
re_detail::jstack<boost::uint_fast32_t, Allocator> classes(64, data.allocator());
677
re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator());
678
bool has_digraphs = false;
679
jm_assert(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_open_set);
681
bool started = false;
692
unsigned l = last_none;
693
traits_string_type s;
695
while((first != last) && !done)
697
traits_size_type c = (traits_size_type)(traits_uchar_type)*first;
698
// this is only used for the switch(), but cannot be folded in
699
// due to a bug in Comeau 4.2.44beta3
700
traits_size_type syntax = traits_inst.syntax_type(c);
703
case traits_type::syntax_caret:
704
if(!started && !isnot)
711
goto char_set_literal;
714
case traits_type::syntax_open_set:
716
if((_flags & char_classes) == 0)
719
goto char_set_literal;
721
// check to see if we really have a class:
722
const charT* base = first;
723
// this is only used for the switch(), but cannot be folded in
724
// due to a bug in Comeau 4.2.44beta3
725
unsigned int inner_set = parse_inner_set(first, last);
728
case traits_type::syntax_colon:
735
boost::uint_fast32_t id = traits_inst.lookup_classname(base+2, first-2);
736
if(_flags & regbase::icase)
738
if((id == traits_type::char_class_upper) || (id == traits_type::char_class_lower))
740
id = traits_type::char_class_alpha;
753
case traits_type::syntax_dot:
755
// we have a collating element [.collating-name.]
757
if(traits_inst.lookup_collatename(s, base+2, first-2))
762
if(s.size())goto char_set_literal;
766
case traits_type::syntax_equal:
768
// we have an equivalence class [=collating-name=]
770
if(traits_inst.lookup_collatename(s, base+2, first-2))
772
std::size_t len = s.size();
778
s[i] = traits_inst.translate(s[i], (_flags & regbase::icase));
781
traits_string_type s2;
782
traits_inst.transform_primary(s2, s);
783
equivalents.push(s2);
791
case traits_type::syntax_left_word:
792
if((started == false) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_close_set))
795
return add_simple(0, re_detail::syntax_element_word_start);
799
case traits_type::syntax_right_word:
800
if((started == false) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_close_set))
803
return add_simple(0, re_detail::syntax_element_word_end);
810
unsigned int t = traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*(base+1));
811
if((t != traits_type::syntax_colon) && (t != traits_type::syntax_dot) && (t != traits_type::syntax_equal))
815
goto char_set_literal;
828
case traits_type::syntax_close_set:
832
goto char_set_literal;
836
case traits_type::syntax_dash:
840
goto char_set_literal;
843
if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_close_set)
847
goto char_set_literal;
849
if((singles.empty() == true) || (l != last_single))
854
ranges.push(singles.peek());
855
if(singles.peek().size() <= 1) // leave digraphs and ligatures in place
859
case traits_type::syntax_slash:
860
if(_flags & regbase::escape_in_lists)
865
traits_size_type c = (traits_size_type)(traits_uchar_type)*first;
866
// this is only used for the switch(), but cannot be folded in
867
// due to a bug in Comeau 4.2.44beta3
868
traits_size_type syntax = traits_inst.syntax_type(c);
871
case traits_type::syntax_w:
877
classes.push(traits_type::char_class_word);
882
case traits_type::syntax_d:
888
classes.push(traits_type::char_class_digit);
893
case traits_type::syntax_s:
899
classes.push(traits_type::char_class_space);
904
case traits_type::syntax_l:
910
classes.push(traits_type::char_class_lower);
915
case traits_type::syntax_u:
921
classes.push(traits_type::char_class_upper);
926
case traits_type::syntax_W:
927
case traits_type::syntax_D:
928
case traits_type::syntax_S:
929
case traits_type::syntax_U:
930
case traits_type::syntax_L:
934
c = parse_escape(first, last);
937
goto char_set_literal;
943
goto char_set_literal;
949
// get string length to stop us going past the end of string (DWA)
950
std::size_t len = s.size();
953
s[i] = traits_inst.translate(s[i], (_flags & regbase::icase));
961
if(s.size() > 1) // add ligatures to singles list as well
975
typedef typename re_detail::is_byte<charT>::width_type width_type;
977
re_detail::re_syntax_base* result;
979
result = compile_set_aux(singles, ranges, classes, equivalents, isnot, re_detail::_wide_type());
981
result = compile_set_aux(singles, ranges, classes, equivalents, isnot, width_type());
984
if((result == 0) && (_flags & regbase::use_except))
990
template <class charT, class traits, class Allocator>
991
re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set_aux(re_detail::jstack<traits_string_type, Allocator>& singles, re_detail::jstack<traits_string_type, Allocator>& ranges, re_detail::jstack<boost::uint_fast32_t, Allocator>& classes, re_detail::jstack<traits_string_type, Allocator>& equivalents, bool isnot, const re_detail::_wide_type&)
993
size_type base = data.size();
994
data.extend(sizeof(re_detail::re_set_long));
995
unsigned int csingles = 0;
996
unsigned int cranges = 0;
997
boost::uint_fast32_t cclasses = 0;
998
unsigned int cequivalents = 0;
999
bool nocollate_state = flags() & regbase::nocollate;
1001
while(singles.empty() == false)
1004
const traits_string_type& s = singles.peek();
1005
std::size_t len = (s.size() + 1) * sizeof(charT);
1006
std::memcpy(reinterpret_cast<charT*>(data.extend(len)), s.c_str(), len);
1009
while(ranges.empty() == false)
1011
traits_string_type c1, c2;
1015
traits_inst.transform(c1, ranges.peek());
1020
traits_inst.transform(c2, ranges.peek());
1024
// for some reason bc5 crashes when throwing exceptions
1025
// from here - probably an EH-compiler bug, but hard to
1027
// delay throw to later:
1029
boost::uint_fast32_t f = _flags;
1030
_flags &= ~regbase::use_except;
1039
std::size_t len = (re_detail::re_strlen(c1.c_str()) + 1) * sizeof(charT);
1040
std::memcpy(data.extend(len), c1.c_str(), len);
1041
len = (re_detail::re_strlen(c2.c_str()) + 1) * sizeof(charT);
1042
std::memcpy(data.extend(len), c2.c_str(), len);
1044
while(classes.empty() == false)
1046
cclasses |= classes.peek();
1049
while(equivalents.empty() == false)
1052
const traits_string_type& s = equivalents.peek();
1053
std::size_t len = (re_detail::re_strlen(s.c_str()) + 1) * sizeof(charT);
1054
std::memcpy(reinterpret_cast<charT*>(data.extend(len)), s.c_str(), len);
1058
re_detail::re_set_long* dat = reinterpret_cast<re_detail::re_set_long*>(reinterpret_cast<unsigned char*>(data.data()) + base);
1059
dat->type = re_detail::syntax_element_long_set;
1060
dat->csingles = csingles;
1061
dat->cranges = cranges;
1062
dat->cclasses = cclasses;
1063
dat->cequivalents = cequivalents;
1069
template <class charT, class traits, class Allocator>
1070
re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set_aux(re_detail::jstack<traits_string_type, Allocator>& singles, re_detail::jstack<traits_string_type, Allocator>& ranges, re_detail::jstack<boost::uint_fast32_t, Allocator>& classes, re_detail::jstack<traits_string_type, Allocator>& equivalents, bool isnot, const re_detail::_narrow_type&)
1072
re_detail::re_set* dat = reinterpret_cast<re_detail::re_set*>(data.extend(sizeof(re_detail::re_set)));
1073
std::memset(dat, 0, sizeof(re_detail::re_set));
1075
while(singles.empty() == false)
1077
dat->_map[(traits_size_type)(traits_uchar_type)*(singles.peek().c_str())] = re_detail::mask_all;
1080
while(ranges.empty() == false)
1082
traits_string_type c1, c2, c3, c4;
1084
if(flags() & regbase::nocollate)
1087
traits_inst.transform(c1, ranges.peek());
1089
if(flags() & regbase::nocollate)
1092
traits_inst.transform(c2, ranges.peek());
1097
// for some reason bc5 crashes when throwing exceptions
1098
// from here - probably an EH-compiler bug, but hard to
1100
// delay throw to later:
1102
boost::uint_fast32_t f = _flags;
1103
_flags &= ~regbase::use_except;
1111
for(unsigned int i = 0; i < 256; ++i)
1114
if(flags() & regbase::nocollate)
1117
traits_inst.transform(c3, c4);
1118
if((c3 <= c1) && (c3 >= c2))
1119
dat->_map[i] = re_detail::mask_all;
1122
while(equivalents.empty() == false)
1124
traits_string_type c1, c2;
1125
for(unsigned int i = 0; i < 256; ++i)
1128
traits_inst.transform_primary(c1, c2);
1129
if(c1 == equivalents.peek())
1130
dat->_map[i] = re_detail::mask_all;
1135
boost::uint_fast32_t flags = 0;
1136
while(classes.empty() == false)
1138
flags |= classes.peek();
1143
for(unsigned int i = 0; i < 256; ++i)
1145
if(traits_inst.is_class(charT(i), flags))
1146
dat->_map[(traits_uchar_type)traits_inst.translate((charT)i, (_flags & regbase::icase))] = re_detail::mask_all;
1152
for(unsigned int i = 0; i < 256; ++i)
1154
dat->_map[i] = !dat->_map[i];
1158
dat->type = re_detail::syntax_element_set;
1163
#ifndef __CODEGUARD__
1164
// this must not be inline when Borland's codeguard support is turned
1165
// on, otherwise we _will_ get surious codeguard errors...
1168
re_detail::re_syntax_base* add_offset(void* base, std::ptrdiff_t off)
1170
return reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(base) + off);
1174
template <class charT, class traits, class Allocator>
1175
void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::fixup_apply(re_detail::re_syntax_base* b, unsigned cbraces)
1177
typedef typename boost::detail::rebind_allocator<bool, Allocator>::type b_alloc;
1179
register unsigned char* base = reinterpret_cast<unsigned char*>(b);
1180
register re_detail::re_syntax_base* ptr = b;
1182
b_alloc a(data.allocator());
1183
#ifndef BOOST_NO_EXCEPTIONS
1187
pb = a.allocate(cbraces);
1188
BOOST_REGEX_NOEH_ASSERT(pb)
1189
for(unsigned i = 0; i < cbraces; ++i)
1198
case re_detail::syntax_element_rep:
1199
jm_assert(data.size() > static_cast<re_detail::re_jump*>(ptr)->alt.i);
1200
static_cast<re_detail::re_jump*>(ptr)->alt.p = add_offset(base, static_cast<re_detail::re_jump*>(ptr)->alt.i);
1201
#ifdef BOOST_REGEX_DEBUG
1202
if((re_detail::padding_mask & reinterpret_cast<int>(static_cast<re_detail::re_jump*>(ptr)->alt.p)) && (static_cast<re_detail::re_jump*>(ptr)->alt.p != b))
1204
jm_trace("padding mis-aligment in repeat jump to object type: " << static_cast<re_detail::re_jump*>(ptr)->alt.p->type)
1205
//jm_assert(0 == (padding_mask & (int)((re_detail::re_jump*)ptr)->alt.p));
1208
static_cast<re_detail::re_repeat*>(ptr)->id = repeats;
1211
case re_detail::syntax_element_jump:
1212
case re_detail::syntax_element_alt:
1213
jm_assert(data.size() > static_cast<re_detail::re_jump*>(ptr)->alt.i);
1214
static_cast<re_detail::re_jump*>(ptr)->alt.p = add_offset(base, static_cast<re_detail::re_jump*>(ptr)->alt.i);
1215
#ifdef BOOST_REGEX_DEBUG
1216
if((re_detail::padding_mask & reinterpret_cast<int>(static_cast<re_detail::re_jump*>(ptr)->alt.p) && (static_cast<re_detail::re_jump*>(ptr)->alt.p != b)))
1218
jm_trace("padding mis-aligment in alternation jump to object type: " << static_cast<re_detail::re_jump*>(ptr)->alt.p->type)
1219
//jm_assert(0 == (padding_mask & (int)((re_detail::re_jump*)ptr)->alt.p));
1223
case re_detail::syntax_element_backref:
1224
if((static_cast<re_detail::re_brace*>(ptr)->index >= (int)cbraces) || (pb[static_cast<re_detail::re_brace*>(ptr)->index] == false) )
1227
a.deallocate(pb, cbraces);
1231
case re_detail::syntax_element_endmark:
1232
if(static_cast<re_detail::re_brace*>(ptr)->index > 0)
1233
pb[static_cast<re_detail::re_brace*>(ptr)->index] = true;
1237
jm_assert(data.size() > ptr->next.i);
1238
ptr->next.p = add_offset(base, ptr->next.i);
1239
#ifdef BOOST_REGEX_DEBUG
1240
if((re_detail::padding_mask & (int)(ptr->next.p)) && (static_cast<re_detail::re_jump*>(ptr)->alt.p != b))
1242
jm_trace("padding mis-alignment in next record of type " << ptr->next.p->type)
1243
jm_assert(0 == (re_detail::padding_mask & (int)(ptr->next.p)));
1249
a.deallocate(pb, cbraces);
1251
#ifndef BOOST_NO_EXCEPTIONS
1256
a.deallocate(pb, cbraces);
1263
template <class charT, class traits, class Allocator>
1264
unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::set_expression(const charT* p, const charT* end, flag_type f)
1267
# pragma warning(push)
1268
# pragma warning(disable: 4127)
1271
// strxfrm not working on OpenBSD??
1272
f |= regbase::nocollate;
1275
if(p == expression())
1277
traits_string_type s(p, end);
1278
return set_expression(s.c_str(), s.c_str() + s.size(), f);
1280
typedef typename traits_type::sentry sentry_t;
1281
sentry_t sent(traits_inst);
1284
const charT* base = p;
1287
fail(REG_NOERROR); // clear any error
1292
return error_code();
1295
const charT* ptr = p;
1297
re_detail::jstack<std::size_t, Allocator> mark(64, data.allocator());
1298
re_detail::jstack<int, Allocator> markid(64, data.allocator());
1299
std::size_t last_mark_popped = 0;
1300
register traits_size_type c;
1301
register re_detail::re_syntax_base* dat;
1303
unsigned rep_min = 0;
1304
unsigned rep_max = 0;
1312
if(_flags & regbase::literal)
1316
dat = add_literal(dat, traits_inst.translate(*ptr, (_flags & regbase::icase)));
1323
c = (traits_size_type)(traits_uchar_type)*ptr;
1324
// this is only used for the switch(), but cannot be folded in
1325
// due to a bug in Comeau 4.2.44beta3
1326
traits_size_type syntax = traits_inst.syntax_type(c);
1329
case traits_type::syntax_open_bracket:
1330
if(_flags & bk_parens)
1332
dat = add_literal(dat, (charT)c);
1338
dat = add_simple(dat, re_detail::syntax_element_startmark, sizeof(re_detail::re_brace));
1340
static_cast<re_detail::re_brace*>(dat)->index = marks++;
1341
mark.push(data.index(dat));
1344
// check for perl like (?...) extention syntax
1345
c = (traits_size_type)(traits_uchar_type)*ptr;
1346
if(((_flags & bk_parens) == 0) && (traits_type::syntax_question == traits_inst.syntax_type(c)))
1349
c = (traits_size_type)(traits_uchar_type)*ptr;
1350
// this is only used for the switch(), but cannot be folded in
1351
// due to a bug in Comeau 4.2.44beta3
1352
traits_size_type syntax = traits_inst.syntax_type(c);
1355
case traits_type::syntax_colon:
1356
static_cast<re_detail::re_brace*>(dat)->index = 0;
1362
case traits_type::syntax_equal:
1363
static_cast<re_detail::re_brace*>(dat)->index = -1;
1366
common_forward_assert:
1370
dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
1373
// we don't know what value to put here yet,
1374
// use an arbitrarily large value for now
1375
// and check it later:
1376
static_cast<re_detail::re_jump*>(dat)->alt.i = INT_MAX/2;
1377
mark.push(data.size() - re_detail::re_jump_size);
1379
case traits_type::syntax_not:
1380
static_cast<re_detail::re_brace*>(dat)->index = -2;
1383
goto common_forward_assert;
1384
case traits_type::syntax_hash:
1385
// comment just skip it:
1386
static_cast<re_detail::re_brace*>(dat)->index = 0;
1392
c = (traits_size_type)(traits_uchar_type)*ptr;
1393
}while(traits_type::syntax_close_bracket != traits_inst.syntax_type(c));
1398
// error, return to standard parsing and let that handle the error:
1404
case traits_type::syntax_close_bracket:
1405
if(_flags & bk_parens)
1407
dat = add_literal(dat, (charT)c);
1416
dat->next.i = data.size();
1422
return error_code();
1424
// see if we have an empty alternative:
1425
if(mark.peek() == data.index(dat) )
1427
re_detail::re_syntax_base* para = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + mark.peek());
1428
if(para->type == re_detail::syntax_element_jump)
1431
return error_code();
1435
// pop any pushed alternatives and set the target end destination:
1436
dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + mark.peek());
1437
while(dat->type == re_detail::syntax_element_jump)
1439
static_cast<re_detail::re_jump*>(dat)->alt.i = data.size();
1444
return error_code();
1446
dat = reinterpret_cast<re_detail::re_jump*>(reinterpret_cast<unsigned char*>(data.data()) + mark.peek());
1449
dat = add_simple(0, re_detail::syntax_element_endmark, sizeof(re_detail::re_brace));
1450
static_cast<re_detail::re_brace*>(dat)->index = markid.peek();
1452
last_mark_popped = mark.peek();
1456
case traits_type::syntax_char:
1457
dat = add_literal(dat, (charT)c);
1460
case traits_type::syntax_slash:
1465
return error_code();
1467
c = (traits_size_type)(traits_uchar_type)*ptr;
1468
// this is only used for the switch(), but cannot be folded in
1469
// due to a bug in Comeau 4.2.44beta3
1470
traits_size_type syntax = traits_inst.syntax_type(c);
1473
case traits_type::syntax_open_bracket:
1474
if(_flags & bk_parens)
1475
goto open_bracked_jump;
1477
case traits_type::syntax_close_bracket:
1478
if(_flags & bk_parens)
1479
goto close_bracked_jump;
1481
case traits_type::syntax_plus:
1482
if((_flags & bk_plus_qm) && ((_flags & limited_ops) == 0))
1485
rep_max = (unsigned)-1;
1489
case traits_type::syntax_question:
1490
if((_flags & bk_plus_qm) && ((_flags & limited_ops) == 0))
1497
case traits_type::syntax_or:
1498
if(((_flags & bk_vbar) == 0) || (_flags & limited_ops))
1500
goto alt_string_jump;
1501
case traits_type::syntax_open_brace:
1502
if( ((_flags & bk_braces) == 0) || ((_flags & intervals) == 0))
1505
// we have {x} or {x,} or {x,y}:
1506
parse_range(ptr, end, rep_min, rep_max);
1509
case traits_type::syntax_digit:
1510
if(_flags & bk_refs)
1513
int i = traits_inst.toi((charT)c);
1516
// we can have \025 which means take char whose
1517
// code is 25 (octal), so parse string:
1518
c = traits_inst.toi(ptr, end, -8);
1522
dat = add_simple(dat, re_detail::syntax_element_backref, sizeof(re_detail::re_brace));
1523
static_cast<re_detail::re_brace*>(dat)->index = i;
1528
case traits_type::syntax_b: // re_detail::syntax_element_word_boundary
1529
dat = add_simple(dat, re_detail::syntax_element_word_boundary);
1532
case traits_type::syntax_B:
1533
dat = add_simple(dat, re_detail::syntax_element_within_word);
1536
case traits_type::syntax_left_word:
1537
dat = add_simple(dat, re_detail::syntax_element_word_start);
1540
case traits_type::syntax_right_word:
1541
dat = add_simple(dat, re_detail::syntax_element_word_end);
1544
case traits_type::syntax_w: //re_detail::syntax_element_word_char
1545
dat = compile_set_simple(dat, traits_type::char_class_word);
1548
case traits_type::syntax_W:
1549
dat = compile_set_simple(dat, traits_type::char_class_word, true);
1552
case traits_type::syntax_d: //re_detail::syntax_element_word_char
1553
dat = compile_set_simple(dat, traits_type::char_class_digit);
1556
case traits_type::syntax_D:
1557
dat = compile_set_simple(dat, traits_type::char_class_digit, true);
1560
case traits_type::syntax_s: //re_detail::syntax_element_word_char
1561
dat = compile_set_simple(dat, traits_type::char_class_space);
1564
case traits_type::syntax_S:
1565
dat = compile_set_simple(dat, traits_type::char_class_space, true);
1568
case traits_type::syntax_l: //re_detail::syntax_element_word_char
1569
dat = compile_set_simple(dat, traits_type::char_class_lower);
1572
case traits_type::syntax_L:
1573
dat = compile_set_simple(dat, traits_type::char_class_lower, true);
1576
case traits_type::syntax_u: //re_detail::syntax_element_word_char
1577
dat = compile_set_simple(dat, traits_type::char_class_upper);
1580
case traits_type::syntax_U:
1581
dat = compile_set_simple(dat, traits_type::char_class_upper, true);
1584
case traits_type::syntax_Q:
1591
return error_code();
1593
if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_slash)
1596
if((ptr != end) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_E))
1600
dat = add_literal(dat, *(ptr-1));
1604
dat = add_literal(dat, *ptr);
1609
case traits_type::syntax_C:
1610
dat = add_simple(dat, re_detail::syntax_element_wild);
1613
case traits_type::syntax_X:
1614
dat = add_simple(dat, re_detail::syntax_element_combining);
1617
case traits_type::syntax_Z:
1618
dat = add_simple(dat, re_detail::syntax_element_soft_buffer_end);
1621
case traits_type::syntax_G:
1622
dat = add_simple(dat, re_detail::syntax_element_restart_continue);
1625
case traits_type::syntax_start_buffer:
1626
dat = add_simple(dat, re_detail::syntax_element_buffer_start);
1629
case traits_type::syntax_end_buffer:
1630
dat = add_simple(dat, re_detail::syntax_element_buffer_end);
1634
c = (traits_size_type)(traits_uchar_type)parse_escape(ptr, end);
1635
dat = add_literal(dat, (charT)c);
1638
dat = add_literal(dat, (charT)c);
1642
case traits_type::syntax_dollar:
1643
dat = add_simple(dat, re_detail::syntax_element_end_line, sizeof(re_detail::re_syntax_base));
1646
case traits_type::syntax_caret:
1647
dat = add_simple(dat, re_detail::syntax_element_start_line, sizeof(re_detail::re_syntax_base));
1650
case traits_type::syntax_dot:
1651
dat = add_simple(dat, re_detail::syntax_element_wild, sizeof(re_detail::re_syntax_base));
1654
case traits_type::syntax_star:
1656
rep_max = (unsigned)-1;
1660
std::ptrdiff_t offset;
1664
return error_code();
1668
case re_detail::syntax_element_endmark:
1669
offset = last_mark_popped;
1671
case re_detail::syntax_element_literal:
1672
if(static_cast<re_detail::re_literal*>(dat)->length > 1)
1675
charT lit = *reinterpret_cast<charT*>(reinterpret_cast<char*>(dat) + sizeof(re_detail::re_literal) + ((static_cast<re_detail::re_literal*>(dat)->length-1)*sizeof(charT)));
1676
--static_cast<re_detail::re_literal*>(dat)->length;
1677
dat = add_simple(dat, re_detail::syntax_element_literal, sizeof(re_detail::re_literal) + sizeof(charT));
1678
static_cast<re_detail::re_literal*>(dat)->length = 1;
1679
*reinterpret_cast<charT*>(static_cast<re_detail::re_literal*>(dat)+1) = lit;
1681
offset = reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data());
1683
case re_detail::syntax_element_backref:
1684
case re_detail::syntax_element_long_set:
1685
case re_detail::syntax_element_set:
1686
case re_detail::syntax_element_wild:
1687
case re_detail::syntax_element_combining:
1688
// we're repeating a single item:
1689
offset = reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data());
1693
return error_code();
1696
dat->next.i = data.size();
1697
//unsigned pos = (char*)dat - (char*)data.data();
1699
// add the trailing jump:
1700
dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
1701
static_cast<re_detail::re_jump*>(dat)->alt.i = 0;
1703
// now insert the leading repeater:
1704
dat = static_cast<re_detail::re_syntax_base*>(data.insert(offset, re_detail::re_repeater_size));
1705
dat->next.i = (reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data())) + re_detail::re_repeater_size;
1706
dat->type = re_detail::syntax_element_rep;
1707
static_cast<re_detail::re_repeat*>(dat)->alt.i = data.size();
1708
static_cast<re_detail::re_repeat*>(dat)->min = rep_min;
1709
static_cast<re_detail::re_repeat*>(dat)->max = rep_max;
1710
static_cast<re_detail::re_repeat*>(dat)->leading = false;
1711
static_cast<re_detail::re_repeat*>(dat)->greedy = true;
1712
move_offsets(dat, re_detail::re_repeater_size);
1715
// now check to see if we have a non-greedy repeat:
1716
if((ptr != end) && (_flags & (limited_ops | bk_plus_qm | bk_braces)) == 0)
1718
c = (traits_size_type)(traits_uchar_type)*ptr;
1719
if(traits_type::syntax_question == traits_inst.syntax_type(c))
1721
// OK repeat is non-greedy:
1722
static_cast<re_detail::re_repeat*>(dat)->greedy = false;
1726
dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + data.size() - re_detail::re_jump_size);
1727
static_cast<re_detail::re_repeat*>(dat)->alt.i = offset;
1730
case traits_type::syntax_plus:
1731
if(_flags & (bk_plus_qm | limited_ops))
1733
dat = add_literal(dat, (charT)c);
1738
rep_max = (unsigned)-1;
1740
case traits_type::syntax_question:
1741
if(_flags & (bk_plus_qm | limited_ops))
1743
dat = add_literal(dat, (charT)c);
1750
case traits_type::syntax_open_set:
1755
dat->next.i = data.size();
1758
dat = compile_set(ptr, end);
1761
if((_flags & regbase::failbit) == 0)
1763
return error_code();
1766
case traits_type::syntax_or:
1768
if(_flags & (bk_vbar | limited_ops))
1770
dat = add_literal(dat, (charT)c);
1780
// start of pattern can't have empty "|"
1782
return error_code();
1784
// see if we have an empty alternative:
1785
if(mark.empty() == false)
1786
if(mark.peek() == data.index(dat))
1789
return error_code();
1792
dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
1795
// we don't know what value to put here yet,
1796
// use an arbitrarily large value for now
1797
// and check it later (TODO!)
1798
static_cast<re_detail::re_jump*>(dat)->alt.i = INT_MAX/2;
1800
// now work out where to insert:
1801
std::size_t offset = 0;
1802
if(mark.empty() == false)
1804
// we have a '(' or '|' to go back to:
1805
offset = mark.peek();
1806
re_detail::re_syntax_base* base = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + offset);
1807
offset = base->next.i;
1809
re_detail::re_jump* j = static_cast<re_detail::re_jump*>(data.insert(offset, re_detail::re_jump_size));
1810
j->type = re_detail::syntax_element_alt;
1811
j->next.i = offset + re_detail::re_jump_size;
1812
j->alt.i = data.size();
1813
move_offsets(j, re_detail::re_jump_size);
1814
dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + data.size() - re_detail::re_jump_size);
1815
mark.push(data.size() - re_detail::re_jump_size);
1819
case traits_type::syntax_open_brace:
1820
if((_flags & bk_braces) || ((_flags & intervals) == 0))
1822
dat = add_literal(dat, (charT)c);
1826
// we have {x} or {x,} or {x,y}:
1827
parse_range(ptr, end, rep_min, rep_max);
1829
case traits_type::syntax_newline:
1830
if(_flags & newline_alt)
1831
goto alt_string_jump;
1832
dat = add_literal(dat, (charT)c);
1835
case traits_type::syntax_close_brace:
1836
if(_flags & bk_braces)
1838
dat = add_literal(dat, (charT)c);
1843
return error_code();
1845
dat = add_literal(dat, (charT)c);
1856
dat->next.i = data.size();
1859
// see if we have an empty alternative:
1860
if(mark.empty() == false)
1861
if(mark.peek() == data.index(dat) )
1863
re_detail::re_syntax_base* para = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + mark.peek());
1864
if(para->type == re_detail::syntax_element_jump)
1867
return error_code();
1873
if(mark.empty() == false)
1875
// pop any pushed alternatives and set the target end destination:
1876
dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + mark.peek());
1877
while(dat->type == re_detail::syntax_element_jump)
1879
static_cast<re_detail::re_jump*>(dat)->alt.i = data.size();
1881
if(mark.empty() == true)
1883
dat = reinterpret_cast<re_detail::re_jump*>(reinterpret_cast<unsigned char*>(data.data()) + mark.peek());
1887
dat = static_cast<re_detail::re_brace*>(data.extend(sizeof(re_detail::re_syntax_base)));
1888
dat->type = re_detail::syntax_element_match;
1891
if(mark.empty() == false)
1894
return error_code();
1898
// allocate space for start _map:
1899
startmap = reinterpret_cast<unsigned char*>(data.extend(256 + ((end - base + 1) * sizeof(charT))));
1901
// and copy the expression we just compiled:
1902
_expression = reinterpret_cast<charT*>(reinterpret_cast<char*>(startmap) + 256);
1903
_expression_len = end - base;
1904
std::memcpy(_expression, base, _expression_len * sizeof(charT));
1905
*(_expression + _expression_len) = charT(0);
1908
// now we need to apply fixups to the array
1909
// so that we can use pointers and not indexes
1910
fixup_apply(static_cast<re_detail::re_syntax_base*>(data.data()), marks);
1912
// check for error during fixup:
1913
if(_flags & regbase::failbit)
1914
return error_code();
1917
// finally compile the maps so that we can make intelligent choices
1918
// whenever we encounter an alternative:
1922
re_detail::kmp_free(pkmp, data.allocator());
1925
re_detail::re_syntax_base* sbase = static_cast<re_detail::re_syntax_base*>(data.data());
1926
_restart_type = probe_restart(sbase);
1927
_leading_len = fixup_leading_rep(sbase, 0);
1928
if((sbase->type == re_detail::syntax_element_literal) && (sbase->next.p->type == re_detail::syntax_element_match))
1930
_restart_type = restart_fixed_lit;
1933
charT* p1 = reinterpret_cast<charT*>(reinterpret_cast<char*>(sbase) + sizeof(re_detail::re_literal));
1934
charT* p2 = p1 + static_cast<re_detail::re_literal*>(sbase)->length;
1935
pkmp = re_detail::kmp_compile(p1, p2, charT(), re_detail::kmp_translator<traits>(_flags®base::icase, &traits_inst), data.allocator());
1938
return error_code();
1944
# pragma warning(pop)
1949
template <class charT, class traits, class Allocator>
1950
re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::add_simple(re_detail::re_syntax_base* dat, re_detail::syntax_element_type type, unsigned int size)
1955
dat->next.i = data.size();
1957
if(size < sizeof(re_detail::re_syntax_base))
1958
size = sizeof(re_detail::re_syntax_base);
1959
dat = static_cast<re_detail::re_syntax_base*>(data.extend(size));
1965
template <class charT, class traits, class Allocator>
1966
re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::add_literal(re_detail::re_syntax_base* dat, charT c)
1968
if(dat && (dat->type == re_detail::syntax_element_literal))
1970
// add another charT to the list:
1971
std::ptrdiff_t pos = reinterpret_cast<unsigned char*>(dat) - reinterpret_cast<unsigned char*>(data.data());
1972
*reinterpret_cast<charT*>(data.extend(sizeof(charT))) = traits_inst.translate(c, (_flags & regbase::icase));
1973
dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + pos);
1974
++(static_cast<re_detail::re_literal*>(dat)->length);
1979
dat = add_simple(dat, re_detail::syntax_element_literal, sizeof(re_detail::re_literal) + sizeof(charT));
1980
static_cast<re_detail::re_literal*>(dat)->length = 1;
1981
*reinterpret_cast<charT*>(reinterpret_cast<re_detail::re_literal*>(dat)+1) = traits_inst.translate(c, (_flags & regbase::icase));
1986
template <class charT, class traits, class Allocator>
1987
unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_restart(re_detail::re_syntax_base* dat)
1991
case re_detail::syntax_element_startmark:
1992
case re_detail::syntax_element_endmark:
1993
if(static_cast<const re_detail::re_brace*>(dat)->index == -2)
1994
return regbase::restart_any;
1995
return probe_restart(dat->next.p);
1996
case re_detail::syntax_element_start_line:
1997
return regbase::restart_line;
1998
case re_detail::syntax_element_word_start:
1999
return regbase::restart_word;
2000
case re_detail::syntax_element_buffer_start:
2001
return regbase::restart_buf;
2002
case re_detail::syntax_element_restart_continue:
2003
return regbase::restart_continue;
2005
return regbase::restart_any;
2009
template <class charT, class traits, class Allocator>
2010
unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::fixup_leading_rep(re_detail::re_syntax_base* dat, re_detail::re_syntax_base* end)
2012
unsigned int len = 0;
2013
bool leading_lit = end ? false : true;
2018
case re_detail::syntax_element_literal:
2019
len += static_cast<re_detail::re_literal*>(dat)->length;
2020
if((leading_lit) && (static_cast<re_detail::re_literal*>(dat)->length > 2))
2022
// we can do a literal search for the leading literal string
2023
// using Knuth-Morris-Pratt (or whatever), and only then check for
2024
// matches. We need a decent length string though to make it
2026
_leading_string = reinterpret_cast<charT*>(reinterpret_cast<char*>(dat) + sizeof(re_detail::re_literal));
2027
_leading_string_len = static_cast<re_detail::re_literal*>(dat)->length;
2028
_restart_type = restart_lit;
2029
leading_lit = false;
2030
const charT* p1 = _leading_string;
2031
const charT* p2 = _leading_string + _leading_string_len;
2032
pkmp = re_detail::kmp_compile(p1, p2, charT(), re_detail::kmp_translator<traits>(_flags®base::icase, &traits_inst), data.allocator());
2034
leading_lit = false;
2036
case re_detail::syntax_element_wild:
2038
leading_lit = false;
2040
case re_detail::syntax_element_match:
2042
case re_detail::syntax_element_backref:
2043
//case re_detail::syntax_element_jump:
2044
case re_detail::syntax_element_alt:
2045
case re_detail::syntax_element_combining:
2047
case re_detail::syntax_element_long_set:
2049
// we need to verify that there are no multi-character
2050
// collating elements inside the repeat:
2051
const charT* p = reinterpret_cast<const charT*>(reinterpret_cast<const char*>(dat) + sizeof(re_detail::re_set_long));
2052
unsigned int csingles = static_cast<re_detail::re_set_long*>(dat)->csingles;
2053
for(unsigned int i = 0; i < csingles; ++i)
2055
if(re_detail::re_strlen(p) > 1)
2061
leading_lit = false;
2064
case re_detail::syntax_element_set:
2066
leading_lit = false;
2068
case re_detail::syntax_element_rep:
2069
if((len == 0) && (1 == fixup_leading_rep(dat->next.p, static_cast<re_detail::re_repeat*>(dat)->alt.p) ))
2071
static_cast<re_detail::re_repeat*>(dat)->leading = leading_lit;
2075
case re_detail::syntax_element_startmark:
2076
if(static_cast<const re_detail::re_brace*>(dat)->index == -2)
2087
template <class charT, class traits, class Allocator>
2088
void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::fail(unsigned int err)
2093
_flags |= regbase::failbit;
2094
#ifndef BOOST_NO_EXCEPTIONS
2095
if(_flags & regbase::use_except)
2097
throw bad_expression(traits_inst.error_string(err));
2102
_flags &= ~regbase::failbit;
2110
} // namespace boost
2113
#endif // BOOST_REGEX_COMPILE_HPP