3
* Copyright (c) 1998-2002
6
* Permission to use, copy, modify, distribute and sell this software
7
* and its documentation for any purpose is hereby granted without fee,
8
* provided that the above copyright notice appear in all copies and
9
* that both that copyright notice and this permission notice appear
10
* in supporting documentation. Dr John Maddock makes no representations
11
* about the suitability of this software for any purpose.
12
* It is provided "as is" without express or implied warranty.
17
* LOCATION: see http://www.boost.org for most recent version.
18
* FILE c_regex_traits.cpp
19
* VERSION see <boost/version.hpp>
20
* DESCRIPTION: Implements the c_regex_traits<charT> traits class
23
#define BOOST_REGEX_SOURCE
25
#include <boost/config.hpp>
28
# pragma warning(disable: 4702)
36
#include <boost/cregex.hpp>
37
#include <boost/regex/regex_traits.hpp>
38
#include <boost/regex/v3/regex_synch.hpp>
39
#include <boost/regex/v3/regex_cstring.hpp>
40
#include <boost/scoped_array.hpp>
42
#include "primary_transform.hpp"
45
#if defined(BOOST_HAS_NL_TYPES_H)
49
// Fixes a very strange bug in Comeau 4.2.45.2 that would otherwise result in
50
// an instantiation loop
51
#if defined(__COMO__) && __COMO_VERSION__ <= 4245
52
void c_regex_adopted_no_longer_needed_loop_shutter_upper() { }
58
// helper function to get the locale name,
59
// works around possibly broken setlocale implementations:
61
const char* getlocale(int id)
63
static const char* def = "Unknown";
64
const char* pl = std::setlocale(id, 0);
71
boost::uint_fast32_t re_char_class_id[] = {
72
boost::re_detail::c_traits_base::char_class_alnum,
73
boost::re_detail::c_traits_base::char_class_alpha,
74
boost::re_detail::c_traits_base::char_class_cntrl,
75
boost::re_detail::c_traits_base::char_class_digit,
76
boost::re_detail::c_traits_base::char_class_graph,
77
boost::re_detail::c_traits_base::char_class_lower,
78
boost::re_detail::c_traits_base::char_class_print,
79
boost::re_detail::c_traits_base::char_class_punct,
80
boost::re_detail::c_traits_base::char_class_space,
81
boost::re_detail::c_traits_base::char_class_upper,
82
boost::re_detail::c_traits_base::char_class_xdigit,
83
boost::re_detail::c_traits_base::char_class_blank,
84
boost::re_detail::c_traits_base::char_class_word,
85
boost::re_detail::c_traits_base::char_class_unicode,
88
const char* re_char_class_names[] = {
105
std::string* re_cls_name;
106
std::string* pclasses;
107
unsigned int classes_count = 0;
108
const unsigned int re_classes_max = 14;
113
struct collate_name_t
117
collate_name_t(const char* p1, const char* p2, const char* p3, const char* p4)
118
: name(p1, p2), value(p3, p4) {}
121
std::string* re_coll_name;
122
std::list<collate_name_t>* pcoll_names;
123
unsigned int collate_count = 0;
127
#ifndef BOOST_RE_MESSAGE_BASE
128
#define BOOST_RE_MESSAGE_BASE 0
131
#if defined(BOOST_HAS_NL_TYPES_H)
132
nl_catd message_cat = (nl_catd)-1;
135
unsigned int message_count = 0;
136
std::string* mess_locale;
138
BOOST_REGEX_DECL char* re_custom_error_messages[] = {
164
#if !defined(LC_MESSAGES)
165
#define LC_MESSAGES LC_CTYPE
171
unsigned int entry_count = 0;
173
std::string* ctype_name;
174
std::string* collate_name;
177
map_size = UCHAR_MAX + 1
180
std::size_t BOOST_REGEX_CALL _re_get_message(char* buf, std::size_t len, std::size_t id);
182
#ifndef BOOST_NO_WREGEX
184
BOOST_REGEX_DECL wchar_t re_zero_w;
185
BOOST_REGEX_DECL wchar_t re_ten_w;
187
unsigned int nlsw_count = 0;
188
std::string* wlocale_name = 0;
196
std::list<syntax_map_t>* syntax;
198
std::size_t BOOST_REGEX_CALL re_get_message(wchar_t* buf, std::size_t len, std::size_t id)
200
std::size_t size = _re_get_message(static_cast<char*>(0), 0, id);
203
boost::scoped_array<char> cb(new char[size]);
204
_re_get_message(cb.get(), size, id);
205
size = boost::c_regex_traits<wchar_t>::strwiden(buf, len, cb.get());
210
inline std::size_t BOOST_REGEX_CALL re_get_message(char* buf, std::size_t len, std::size_t id)
212
return _re_get_message(buf, len, id);
215
void BOOST_REGEX_CALL re_init_classes()
218
if(classes_count == 0)
220
re_cls_name = new std::string("xxxxxxxx");
221
#ifndef BOOST_NO_EXCEPTIONS
224
pclasses = new std::string[re_classes_max];
225
BOOST_REGEX_NOEH_ASSERT(pclasses)
226
#ifndef BOOST_NO_EXCEPTIONS
238
void BOOST_REGEX_CALL re_free_classes()
241
if(--classes_count == 0)
248
void BOOST_REGEX_CALL re_update_classes()
251
if(*re_cls_name != getlocale(LC_CTYPE))
253
*re_cls_name = getlocale(LC_CTYPE);
256
for(i = 0; i < re_classes_max; ++i)
258
re_get_message(buf, 256, i+300);
264
void BOOST_REGEX_CALL re_init_collate()
267
if(collate_count == 0)
269
re_coll_name = new std::string("xxxxxxxx");
270
#ifndef BOOST_NO_EXCEPTIONS
273
pcoll_names = new std::list<collate_name_t>();
274
BOOST_REGEX_NOEH_ASSERT(pcoll_names)
275
#ifndef BOOST_NO_EXCEPTIONS
287
void BOOST_REGEX_CALL re_free_collate()
290
if(--collate_count == 0)
297
void BOOST_REGEX_CALL re_update_collate()
300
if(*re_coll_name != getlocale(LC_COLLATE))
302
*re_coll_name = getlocale(LC_COLLATE);
304
unsigned int i = 400;
305
re_get_message(buf, 256, i);
308
char* p1, *p2, *p3, *p4;;
310
while(*p1 && std::isspace((unsigned char)*p1))++p1;
312
while(*p2 && !std::isspace((unsigned char)*p2))++p2;
314
while(*p3 && std::isspace((unsigned char)*p3))++p3;
316
while(*p4 && !std::isspace((unsigned char)*p4))++p4;
317
pcoll_names->push_back(collate_name_t(p1, p2, p3, p4));
319
re_get_message(buf, 256, i);
324
std::size_t BOOST_REGEX_CALL _re_get_message(char* buf, std::size_t len, std::size_t id)
327
// get the customised message if any:
328
#if defined(BOOST_HAS_NL_TYPES_H)
329
if(message_cat != (nl_catd)-1)
331
const char* m = catgets(message_cat, 0, id, 0);
334
std::size_t size = std::strlen(m) + 1;
344
// now get the default message if any:
345
return boost::re_detail::re_get_default_message(buf, len, id);
348
void BOOST_REGEX_CALL re_message_init()
351
if(message_count == 0)
353
mess_locale = new std::string("xxxxxxxxxxxxxxxx");
358
void BOOST_REGEX_CALL re_message_update()
362
// called whenever the global locale changes:
364
std::string l(getlocale(LC_MESSAGES));
365
if(*mess_locale != l)
368
#if defined(BOOST_HAS_NL_TYPES_H)
369
if(message_cat != (nl_catd)-1)
371
catclose(message_cat);
372
message_cat = (nl_catd)-1;
374
if(*boost::re_detail::c_traits_base::get_catalogue())
376
message_cat = catopen(boost::re_detail::c_traits_base::get_catalogue(), 0);
377
#ifndef BOOST_NO_EXCEPTIONS
378
if(message_cat == (nl_catd)-1)
380
std::string m("Unable to open message catalog: ");
381
throw std::runtime_error(m + boost::re_detail::c_traits_base::get_catalogue());
384
BOOST_REGEX_NOEH_ASSERT(message_cat != (nl_catd)-1);
388
for(int i = 0; i < boost::REG_E_UNKNOWN; ++i)
390
if(re_custom_error_messages[i])
392
boost::re_detail::re_strfree(re_custom_error_messages[i]);
393
re_custom_error_messages[i] = 0;
399
void BOOST_REGEX_CALL re_message_free()
403
if(message_count == 0)
405
#if defined(BOOST_HAS_NL_TYPES_H)
406
if(message_cat != (nl_catd)-1)
407
catclose(message_cat);
410
for(int i = 0; i < boost::REG_E_UNKNOWN; ++i)
412
if(re_custom_error_messages[i])
414
boost::re_detail::re_strfree(re_custom_error_messages[i]);
415
re_custom_error_messages[i] = 0;
422
const char* BOOST_REGEX_CALL re_get_error_str(unsigned int id)
425
#ifdef BOOST_HAS_THREADS
426
boost::re_detail::cs_guard g(*boost::re_detail::p_re_lock);
428
if(re_custom_error_messages[id] == 0)
431
_re_get_message(buf, 256, id + 200);
434
re_custom_error_messages[id] = boost::re_detail::re_strdup(buf);
435
return re_custom_error_messages[id];
437
return boost::re_detail::re_default_error_messages[id];
439
return re_custom_error_messages[id];
447
char c_traits_base::regex_message_catalogue[BOOST_REGEX_MAX_PATH] = {0};
449
std::string BOOST_REGEX_CALL c_traits_base::error_string(unsigned id)
451
return re_get_error_str(id);
454
void BOOST_REGEX_CALL c_traits_base::do_update_collate()
459
const char* p = "zero";
460
if(c_regex_traits<char>::lookup_collatename(s, p, p+4))
462
jm_assert(s.size() == 1);
463
re_zero = *s.c_str();
469
if(c_regex_traits<char>::lookup_collatename(s, p, p+3))
471
jm_assert(s.size() == 1);
478
void BOOST_REGEX_CALL c_traits_base::do_update_ctype()
481
// start by updating the syntax map:
483
char buf[map_size+2];
484
std::memset(syntax_map, syntax_char, map_size);
485
for(i = 1; i < syntax_max; ++i)
488
re_get_message(static_cast<char*>(buf), map_size, i+100);
491
syntax_map[(unsigned char)*ptr] = (unsigned char)i;
495
// now update the character class map,
496
// and lower case map:
497
std::memset(class_map, 0, map_size);
498
for(i = 0; i < map_size; ++i)
501
class_map[i] |= char_class_alpha;
503
class_map[i] |= char_class_cntrl;
505
class_map[i] |= char_class_digit;
507
class_map[i] |= char_class_lower;
509
class_map[i] |= char_class_upper;
511
class_map[i] |= char_class_punct;
513
class_map[i] |= char_class_space;
515
class_map[i] |= char_class_xdigit;
517
class_map[(unsigned char)'_'] |= char_class_underscore;
518
class_map[(unsigned char)' '] |= char_class_blank;
519
class_map[(unsigned char)'\t'] |= char_class_blank;
520
for(i = 0; i < map_size; ++i)
522
lower_case_map[i] = (char)std::tolower(i);
527
boost::uint_fast32_t BOOST_REGEX_CALL c_traits_base::do_lookup_class(const char* p)
531
for(i = 0; i < re_classes_max; ++i)
535
return re_char_class_id[i];
538
for(i = 0; i < re_classes_max; ++i)
540
if(std::strcmp(re_char_class_names[i], p) == 0)
542
return re_char_class_id[i];
548
bool BOOST_REGEX_CALL c_traits_base::do_lookup_collate(std::string& buf, const char* p)
551
std::list<collate_name_t>::iterator first, last;
552
first = pcoll_names->begin();
553
last = pcoll_names->end();
556
if((*first).name == p)
558
buf = (*first).value;
564
bool result = re_detail::re_lookup_def_collate_name(buf, p);
565
if((result == 0) && (std::strlen(p) == 1))
573
std::string BOOST_REGEX_CALL c_traits_base::set_message_catalogue(const std::string& l)
575
if(sizeof(regex_message_catalogue) <= l.size())
577
std::string old(regex_message_catalogue);
578
std::strcpy(regex_message_catalogue, l.c_str());
582
unsigned char c_traits_base::syntax_map[map_size];
583
unsigned short c_traits_base::class_map[map_size];
584
char c_traits_base::lower_case_map[map_size];
586
} // namespace re_detail
588
#ifndef BOOST_NO_WREGEX
589
bool BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_collatename(std::basic_string<wchar_t>& out, const wchar_t* first, const wchar_t* last)
592
std::basic_string<wchar_t> s(first, last);
593
std::size_t len = strnarrow(static_cast<char*>(0), 0, s.c_str());
594
scoped_array<char> buf(new char[len]);
595
strnarrow(buf.get(), len, s.c_str());
597
bool result = base_type::do_lookup_collate(t_out, buf.get());
598
if(t_out.size() == 0) result = false;
603
len = strwiden(static_cast<wchar_t*>(0), 0, t_out.c_str());
604
scoped_array<wchar_t> wb(new wchar_t[len]);
605
strwiden(wb.get(), len, t_out.c_str());
609
out.append(1, (wchar_t)0);
615
c_regex_traits<char> c_regex_traits<char>::i;
617
void BOOST_REGEX_CALL c_regex_traits<char>::init()
620
#ifdef BOOST_HAS_THREADS
621
re_detail::re_init_threads();
622
re_detail::cs_guard g(*re_detail::p_re_lock);
624
// just keep track of entry_count
627
ctype_name = new std::string("xxxxxxxxxxxxxxxx");
628
#ifndef BOOST_NO_EXCEPTIONS
631
collate_name = new std::string("xxxxxxxxxxxxxxxx");
632
BOOST_REGEX_NOEH_ASSERT(collate_name)
633
#ifndef BOOST_NO_EXCEPTIONS
648
void BOOST_REGEX_CALL c_regex_traits<char>::update()
651
#ifdef BOOST_HAS_THREADS
652
re_detail::cs_guard g(*re_detail::p_re_lock);
655
if(*collate_name != getlocale(LC_COLLATE))
658
*collate_name = getlocale(LC_COLLATE);
660
if(*ctype_name != getlocale(LC_CTYPE))
663
*ctype_name = getlocale(LC_CTYPE);
665
sort_type = re_detail::find_sort_syntax(&i, &sort_delim);
668
void BOOST_REGEX_CALL c_regex_traits<char>::m_free()
671
#ifdef BOOST_HAS_THREADS
672
re_detail::cs_guard g(*re_detail::p_re_lock);
678
// add reference to static member here to ensure
679
// that the linker includes it in the .exe:
680
if((entry_count == 0) && (0 != &c_regex_traits<char>::i))
685
#ifdef BOOST_HAS_THREADS
687
re_detail::re_free_threads();
691
void BOOST_REGEX_CALL c_regex_traits<char>::transform(std::string& out, const std::string& in)
694
std::size_t n = std::strxfrm(0, in.c_str(), 0);
695
if(n == (std::size_t)(-1))
700
scoped_array<char> buf(new char[n+1]);
701
n = std::strxfrm(buf.get(), in.c_str(), n+1);
702
if(n == (std::size_t)(-1))
710
void BOOST_REGEX_CALL c_regex_traits<char>::transform_primary(std::string& out, const std::string& in)
715
case re_detail::sort_C:
716
case re_detail::sort_unknown:
718
case re_detail::sort_fixed:
719
out.erase((int)sort_delim);
721
case re_detail::sort_delim:
722
for(unsigned int i = 0; i < out.size(); ++i)
724
if((out[i] == sort_delim) && (i+1 < out.size()))
733
unsigned c_regex_traits<char>::sort_type;
734
char c_regex_traits<char>::sort_delim;
737
int BOOST_REGEX_CALL c_regex_traits<char>::toi(char c)
739
if(is_class(c, char_class_digit))
741
if(is_class(c, char_class_xdigit))
742
return 10 + translate(c, true) - translate(re_ten, true);
743
return -1; // error!!
746
int BOOST_REGEX_CALL c_regex_traits<char>::toi(const char*& first, const char* last, int radix)
751
// if radix is less than zero, then restrict
752
// return value to charT. NB assumes sizeof(charT) <= sizeof(int)
754
maxval = 1u << (sizeof(*first) * CHAR_BIT - 1);
761
maxval = (unsigned int)-1;
765
unsigned int result = 0;
766
unsigned int type = (radix > 10) ? char_class_xdigit : char_class_digit;
767
while((first != last) && is_class(*first, type) && (result <= maxval))
770
result += toi(*first);
776
#ifndef BOOST_NO_WREGEX
778
unsigned int BOOST_REGEX_CALL c_regex_traits<wchar_t>::syntax_type(size_type c)
781
std::list<syntax_map_t>::const_iterator first, last;
782
first = syntax->begin();
783
last = syntax->end();
786
if((uchar_type)(*first).c == c)
787
return (*first).type;
793
void BOOST_REGEX_CALL c_regex_traits<wchar_t>::init()
796
re_detail::re_init_threads();
797
#ifdef BOOST_HAS_THREADS
798
re_detail::cs_guard g(*re_detail::p_re_lock);
805
wlocale_name = new std::string("xxxxxxxxxxxxxxxx");
806
#ifndef BOOST_NO_EXCEPTIONS
809
syntax = new std::list<syntax_map_t>();
810
BOOST_REGEX_NOEH_ASSERT(syntax)
811
#ifndef BOOST_NO_EXCEPTIONS
823
bool BOOST_REGEX_CALL c_regex_traits<wchar_t>::do_lookup_collate(std::basic_string<wchar_t>& out, const wchar_t* first, const wchar_t* last)
826
std::basic_string<wchar_t> s(first, last);
827
std::size_t len = strnarrow(static_cast<char*>(0), 0, s.c_str());
828
scoped_array<char> buf(new char[len]);
829
strnarrow(buf.get(), len, s.c_str());
831
bool result = base_type::do_lookup_collate(t_out, buf.get());
834
len = strwiden(static_cast<wchar_t*>(0), 0, t_out.c_str());
835
scoped_array<wchar_t> wb(new wchar_t[len]);
836
strwiden(wb.get(), len, t_out.c_str());
843
void BOOST_REGEX_CALL c_regex_traits<wchar_t>::update()
846
#ifdef BOOST_HAS_THREADS
847
re_detail::cs_guard g(*re_detail::p_re_lock);
852
std::string l(getlocale(LC_CTYPE));
853
if(*wlocale_name != l)
856
std::basic_string<wchar_t> s;
857
const wchar_t* p = L"zero";
858
if(do_lookup_collate(s, p, p+4))
860
jm_assert(s.size() == 1);
861
re_zero_w = *s.c_str();
867
if(do_lookup_collate(s, p, p+3))
869
jm_assert(s.size() == 1);
870
re_ten_w = *s.c_str();
879
for(i = 1; i < syntax_max; ++i)
882
re_get_message(static_cast<wchar_t*>(buf), 256, i+100);
887
syntax->push_back(sm);
890
sort_type = re_detail::find_sort_syntax(&init_, &sort_delim);
894
void BOOST_REGEX_CALL c_regex_traits<wchar_t>::m_free()
897
#ifdef BOOST_HAS_THREADS
898
re_detail::cs_guard g(*re_detail::p_re_lock);
904
// add reference to static member here to ensure
905
// that the linker includes it in the .exe:
906
if((nlsw_count == 0) && (0 != &c_regex_traits<wchar_t>::init_))
912
#ifdef BOOST_HAS_THREADS
914
re_detail::re_free_threads();
918
bool BOOST_REGEX_CALL c_regex_traits<wchar_t>::do_iswclass(wchar_t c, boost::uint_fast32_t f)
922
return BOOST_REGEX_MAKE_BOOL(re_detail::wide_unicode_classes[(uchar_type)c] & f);
923
if((f & char_class_alpha) && std::iswalpha(c))
925
if((f & char_class_cntrl) && std::iswcntrl(c))
927
if((f & char_class_digit) && std::iswdigit(c))
929
if((f & char_class_lower) && std::iswlower(c))
931
if((f & char_class_punct) && std::iswpunct(c))
933
if((f & char_class_space) && std::iswspace(c))
935
if((f & char_class_upper) && std::iswupper(c))
937
if((f & char_class_xdigit) && std::iswxdigit(c))
939
if(f & char_class_unicode)
944
void BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform(std::basic_string<wchar_t>& out, const std::basic_string<wchar_t>& in)
948
std::size_t n = std::wcsxfrm(0, in.c_str(), 0);
950
// broken wcsxfrm under VC6 doesn't check size of
951
// output buffer, we have no choice but to guess!
952
std::size_t n = 100 * in.size();
954
if((n == (std::size_t)(-1)) || (n == 0))
959
scoped_array<wchar_t> buf(new wchar_t[n+1]);
960
n = std::wcsxfrm(buf.get(), in.c_str(), n+1);
961
if(n == (std::size_t)(-1))
969
void BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform_primary(std::basic_string<wchar_t>& out, const std::basic_string<wchar_t>& in)
974
case re_detail::sort_C:
975
case re_detail::sort_unknown:
977
case re_detail::sort_fixed:
978
if((unsigned)sort_delim < out.size())
979
out.erase((int)sort_delim);
981
case re_detail::sort_delim:
982
for(unsigned int i = 0; i < out.size(); ++i)
984
if((out[i] == sort_delim) && (i+1 < out.size()))
993
unsigned c_regex_traits<wchar_t>::sort_type;
994
wchar_t c_regex_traits<wchar_t>::sort_delim;
997
int BOOST_REGEX_CALL c_regex_traits<wchar_t>::toi(wchar_t c)
999
if(is_class(c, char_class_digit))
1000
return c - re_zero_w;
1001
if(is_class(c, char_class_xdigit))
1002
return 10 + translate(c, true) - translate(re_ten_w, true);
1003
return -1; // error!!
1006
int BOOST_REGEX_CALL c_regex_traits<wchar_t>::toi(const wchar_t*& first, const wchar_t* last, int radix)
1008
unsigned int maxval;
1011
// if radix is less than zero, then restrict
1012
// return value to charT. NB assumes sizeof(charT) <= sizeof(int)
1014
maxval = 1u << (sizeof(*first) * CHAR_BIT - 1);
1021
maxval = (unsigned int)-1;
1025
unsigned int result = 0;
1026
unsigned int type = (radix > 10) ? char_class_xdigit : char_class_digit;
1027
while((first != last) && is_class(*first, type) && (result <= maxval))
1030
result += toi(*first);
1036
boost::uint_fast32_t BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_classname(const wchar_t* first, const wchar_t* last)
1038
std::basic_string<wchar_t> s(first, last);
1039
std::size_t len = strnarrow(static_cast<char*>(0), 0, s.c_str());
1040
scoped_array<char> buf(new char[len]);
1041
strnarrow(buf.get(), len, s.c_str());
1042
boost::uint_fast32_t result = do_lookup_class(buf.get());
1046
c_regex_traits<wchar_t> c_regex_traits<wchar_t>::init_;
1048
std::size_t BOOST_REGEX_CALL c_regex_traits<wchar_t>::strnarrow(char *s1, std::size_t len, const wchar_t *s2)
1050
BOOST_RE_GUARD_STACK
1051
std::size_t size = std::wcslen(s2) + 1;
1054
return std::wcstombs(s1, s2, len);
1057
std::size_t BOOST_REGEX_CALL c_regex_traits<wchar_t>::strwiden(wchar_t *s1, std::size_t len, const char *s2)
1059
BOOST_RE_GUARD_STACK
1060
std::size_t size = std::strlen(s2) + 1;
1063
size = std::mbstowcs(s1, s2, len);
1068
#endif // BOOST_NO_WREGEX
1070
} // namespace boost