1
/*********************************************************
2
* Copyright (C) 2008 VMware, Inc. All rights reserved.
4
* This program is free software; you can redistribute it and/or modify it
5
* under the terms of the GNU Lesser General Public License as published
6
* by the Free Software Foundation version 2.1 and no later version.
8
* This program is distributed in the hope that it will be useful, but
9
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
10
* or FITNESS FOR A PARTICULAR PURPOSE. See the Lesser GNU General Public
11
* License for more details.
13
* You should have received a copy of the GNU Lesser General Public License
14
* along with this program; if not, write to the Free Software Foundation, Inc.,
15
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
17
*********************************************************/
22
* A string wrapper for bora/lib/unicode. This class is intended to provide
23
* more c++ features such as operator overloading, automatic string conversion
24
* between different types of string classes.
38
* Initialize static scope variables,
40
* Note that with the way this is done, it's important not to delay load glib
41
* libraries. See bug 397373 for more details. If you're getting crazy values
42
* for utf::string::npos, check your linker flags.
44
const string::size_type string::npos = Glib::ustring::npos;
48
*-----------------------------------------------------------------------------
50
* utf::string::string --
60
*-----------------------------------------------------------------------------
72
*-----------------------------------------------------------------------------
74
* utf::string::string --
84
*-----------------------------------------------------------------------------
87
string::string(ConstUnicode s) // IN
93
mUstr = Unicode_GetUTF8(s);
94
ASSERT(Validate(mUstr));
101
*-----------------------------------------------------------------------------
103
* utf::string::init_bstr_t --
105
* Utility function to construct from a _bstr_t object.
106
* Copies the UTF-16 representation of the _bstr_t.
112
* Makes a copy of the _bstr_t data and frees that data when
113
* the utf::string is destroyed.
118
*-----------------------------------------------------------------------------
122
string::init_bstr_t(const _bstr_t &s) // IN
124
// If the input is empty, then there's nothing to do.
125
if (s.length() == 0) {
129
Unicode utf8 = Unicode_AllocWithUTF16(static_cast<const utf16_t *>(s));
139
ASSERT(Validate(mUstr));
144
*-----------------------------------------------------------------------------
146
* utf::string::string --
148
* Constructor from a ubstr_t object. Copies the UTF-16 representation of
155
* Makes a copy of the ubstr_t data and frees that data when the
156
* utf::string is destroyed.
161
*-----------------------------------------------------------------------------
164
string::string(const ubstr_t &s) // IN
169
// If the input is empty, then there's nothing to do.
170
if (s.length() == 0) {
174
mUstr = static_cast<const char *>(s);
175
ASSERT(Validate(mUstr));
180
*-----------------------------------------------------------------------------
182
* utf::string::string --
184
* Constructor from a _bstr_t object. Copies the UTF-16 representation of
185
* the _bstr_t. Needed for dealing with _com_error::Description().
191
* Makes a copy of the _bstr_t data and frees that data when
192
* the utf::string is destroyed.
197
*-----------------------------------------------------------------------------
200
string::string(const _bstr_t &s) // IN
210
*-----------------------------------------------------------------------------
212
* utf::string::string --
214
* Constructor from a uvariant_t object. Copies the UTF-16 representation
215
* of the ubstr_t interface.
221
* Makes a copy of the uvariant_t data and frees that data when the
222
* utf::string is destroyed.
227
*-----------------------------------------------------------------------------
230
string::string(const uvariant_t &v) // IN
240
Warning("Invalid uvariant_t to ubstr_t conversion.\n");
244
// If the input is empty, then there's nothing to do.
245
if (s.length() == 0) {
249
mUstr = static_cast<const char *>(s);
250
ASSERT(Validate(mUstr));
255
*-----------------------------------------------------------------------------
257
* utf::string::string --
259
* Constructor from a _variant_t object. Copies the UTF-16 representation
266
* Makes a copy of the _variant_t data and frees that data when
267
* the utf::string is destroyed.
272
*-----------------------------------------------------------------------------
275
string::string(const _variant_t &v) // IN
285
Warning("Invalid _variant_t to _bstr_t conversion.\n");
296
*-----------------------------------------------------------------------------
298
* utf::string::string --
308
*-----------------------------------------------------------------------------
311
string::string(const utf16string &s) // IN
316
// If the input is empty, then there's nothing to do.
321
string copy(s.c_str());
327
*-----------------------------------------------------------------------------
329
* utf::string::string --
339
*-----------------------------------------------------------------------------
342
string::string(const utf16_t *s) // IN
350
* Since we already have a UTF-16 representation of the string, copy it
353
mUtf16Cache = Unicode_UTF16Strdup(s);
355
Unicode utf8 = Unicode_AllocWithUTF16(s);
365
ASSERT(Validate(mUstr));
370
*-----------------------------------------------------------------------------
372
* utf::string::string --
382
*-----------------------------------------------------------------------------
385
string::string(const char *s, // IN
386
StringEncoding encoding) // IN
393
Unicode utf8 = Unicode_Alloc(s, encoding);
403
ASSERT(Validate(mUstr));
408
*-----------------------------------------------------------------------------
410
* utf::string::string --
414
* XXX: When initializing mUstr, we do a deep copy of the string data
415
* instead of just calling mUstr(s). This is because Glib::ustring is very
416
* smart about sharing storage, and zero_clear is very dumb. Once we get
417
* rid of zero_clear and have a separate sensitive-string class, this can
418
* go back to being simple.
426
*-----------------------------------------------------------------------------
429
string::string(const Glib::ustring &s) // IN
439
*-----------------------------------------------------------------------------
441
* utf::string::string --
445
* XXX: When initializing mUstr, we do a deep copy of the string data
446
* instead of just calling mUstr(s). This is because Glib::ustring is very
447
* smart about sharing storage, and zero_clear is very dumb. Once we get
448
* rid of zero_clear and have a separate sensitive-string class, this can
449
* go back to being simple.
457
*-----------------------------------------------------------------------------
460
string::string(const string &s) // IN
461
: mUstr(s.mUstr.c_str()),
469
*-----------------------------------------------------------------------------
471
* utf::string::~string --
481
*-----------------------------------------------------------------------------
491
*-----------------------------------------------------------------------------
493
* utf::string::operator Glib::ustring --
495
* Implicit conversion to Glib::ustring operator
498
* The internal Glib::ustring object.
503
*-----------------------------------------------------------------------------
506
string::operator const Glib::ustring& ()
516
*-----------------------------------------------------------------------------
518
* utf::string::operator ubstr_t --
520
* Implicit conversion to ubstr_t
523
* The current ubstr_t string. NUL-terminated.
529
* This function is only defined in _WIN32
531
*-----------------------------------------------------------------------------
534
string::operator const ubstr_t()
537
return ubstr_t(GetUtf16Cache());
544
*-----------------------------------------------------------------------------
546
* utf::string::operator= --
548
* Assignment operator.
551
* A reference to this string.
556
*-----------------------------------------------------------------------------
560
string::operator=(string copy) // IN
568
*-----------------------------------------------------------------------------
570
* utf::string::operator+= --
572
* Append operator of the utf::string class.
575
* A reference to this string.
580
*-----------------------------------------------------------------------------
584
string::operator+=(const string &s) // IN
591
string::operator+=(value_type uc) // IN
599
*-----------------------------------------------------------------------------
601
* utf::string::swap --
603
* Swaps the contents with a given utf::string.
611
*-----------------------------------------------------------------------------
615
string::swap(string &s) // IN/OUT
618
std::swap(mUtf16Cache, s.mUtf16Cache);
619
std::swap(mUtf16Length, s.mUtf16Length);
624
*-----------------------------------------------------------------------------
626
* utf::string::resize --
628
* Change the size of this utf::string.
636
*-----------------------------------------------------------------------------
640
string::resize(size_type n, // IN
641
value_type c) // IN/OPT
649
*-----------------------------------------------------------------------------
651
* utf::string::c_str --
653
* Get the UTF-8 representation of this string.
656
* The current string with UTF-8 encoding. NUL-terminated.
661
*-----------------------------------------------------------------------------
668
return mUstr.c_str();
673
*-----------------------------------------------------------------------------
675
* utf::string::w_str --
677
* Get the UTF-16 representation of this string.
680
* The current string with UTF-16 (host-endian) encoding. NUL-terminated.
685
*-----------------------------------------------------------------------------
692
return GetUtf16Cache();
697
*-----------------------------------------------------------------------------
699
* utf::string::ustr --
701
* Get the Glib::ustring backing of this string.
704
* The internal Glib::ustring object.
709
*-----------------------------------------------------------------------------
721
*-----------------------------------------------------------------------------
723
* utf::string::empty --
725
* Test if this is an empty string.
728
* true if it's an empty string, otherwise false.
733
*-----------------------------------------------------------------------------
740
return mUstr.empty();
745
*-----------------------------------------------------------------------------
747
* utf::string::size --
750
* Returns the length of this string, in characters (code points),
756
*-----------------------------------------------------------------------------
768
*-----------------------------------------------------------------------------
770
* utf::string::w_size --
773
* Returns the length of this string, in UTF-16 code units,
779
*-----------------------------------------------------------------------------
786
if (mUtf16Length == npos) {
787
mUtf16Length = Unicode_UTF16Strlen(GetUtf16Cache());
795
*-----------------------------------------------------------------------------
797
* utf::string::length --
800
* Returns the length of this string, in characters (code points),
801
* excluding NUL. (Same as size().)
806
*-----------------------------------------------------------------------------
818
*-----------------------------------------------------------------------------
820
* utf::string::bytes --
823
* Returns the number of bytes used by the UTF-8 representation of this
824
* string, excluding NUL.
829
*-----------------------------------------------------------------------------
836
return mUstr.bytes();
841
*-----------------------------------------------------------------------------
843
* utf::string::foldCase --
845
* Returns the case-folded string of this string.
848
* The newly created string.
853
*-----------------------------------------------------------------------------
860
return string(mUstr.casefold());
865
*-----------------------------------------------------------------------------
867
* utf::string::trim --
869
* Returns the whitespace-trimmed version of this string.
872
* The newly created string.
877
*-----------------------------------------------------------------------------
884
Unicode trim = Unicode_Trim(c_str());
892
*-----------------------------------------------------------------------------
894
* utf::string::trimLeft --
896
* Get the left-trimmed version of this string.
899
* The newly created string.
904
*-----------------------------------------------------------------------------
911
Unicode trim = Unicode_TrimLeft(c_str());
919
*-----------------------------------------------------------------------------
921
* utf::string::trimRight --
923
* Get the right-trimmed version of this string.
926
* The newly created string.
931
*-----------------------------------------------------------------------------
938
Unicode trim = Unicode_TrimRight(c_str());
946
*-----------------------------------------------------------------------------
948
* utf::string::normalize --
950
* Creates a new string by normalizing the input string.
953
* The newly created string.
958
*-----------------------------------------------------------------------------
962
string::normalize(NormalizeMode mode) // IN
965
return mUstr.normalize((Glib::NormalizeMode)mode);
970
*-----------------------------------------------------------------------------
972
* utf::string::toLower --
974
* Creates a new string by lower-casing the input string using
975
* the rules of the specified locale.
978
* The newly created string.
983
*-----------------------------------------------------------------------------
987
string::toLower(const char *locale) // IN
991
Unicode lower = Unicode_ToLower(c_str(), locale);
992
string results(lower);
995
string results(mUstr.lowercase());
1003
*-----------------------------------------------------------------------------
1005
* utf::string::toUpper --
1007
* Creates a new string by upper-casing the input string using
1008
* the rules of the specified locale.
1011
* The newly created string.
1016
*-----------------------------------------------------------------------------
1020
string::toUpper(const char *locale) // IN
1024
Unicode upper = Unicode_ToUpper(c_str(), locale);
1025
string results(upper);
1026
Unicode_Free(upper);
1028
string results(mUstr.uppercase());
1039
*-----------------------------------------------------------------------------
1041
* utf::string::toTitle --
1043
* Creates a new string by title-casing the input string using
1044
* the rules of the specified locale.
1047
* The newly created string.
1052
*-----------------------------------------------------------------------------
1056
string::toTitle(const char *locale) // IN
1059
Unicode title = Unicode_ToTitle(c_str(), locale);
1060
string results(title);
1061
Unicode_Free(title);
1070
*-----------------------------------------------------------------------------
1072
* utf::string::append --
1074
* Appends the argument string to this utf::string.
1077
* A reference to this object.
1082
*-----------------------------------------------------------------------------
1086
string::append(const string &s) // IN
1089
mUstr.append(s.mUstr);
1096
string::append(const string &s, // IN
1101
mUstr.append(s.mUstr, i, n);
1108
string::append(const char *s, // IN
1119
*-----------------------------------------------------------------------------
1121
* utf::string::push_back --
1123
* Appends the character at the end of this string.
1131
*-----------------------------------------------------------------------------
1135
string::push_back(value_type uc) // IN
1138
mUstr.push_back(uc);
1143
*-----------------------------------------------------------------------------
1145
* utf::string::assign --
1147
* Assigns the passed in string to this string.
1149
* Callers should prefer using operator= instead of assign().
1152
* A reference to this object
1157
*-----------------------------------------------------------------------------
1161
string::assign(const string &s) // IN
1163
return operator=(s);
1168
*-----------------------------------------------------------------------------
1170
* utf::string::insert --
1172
* Inserts the argument string to this string at index i, return this
1175
* These are passthrough calls to the Glib::insert calls.
1178
* A reference to this object
1183
*-----------------------------------------------------------------------------
1187
string::insert(size_type i, // IN
1188
const string &s) // IN
1191
mUstr.insert(i, s.mUstr);
1197
string::insert(size_type i, // IN
1202
mUstr.insert(i, n, uc);
1208
*-----------------------------------------------------------------------------
1210
* utf::string::clear --
1212
* Clears this string.
1220
*-----------------------------------------------------------------------------
1232
*-----------------------------------------------------------------------------
1234
* utf::string::zero_clear --
1236
* Zeroes and clears this string.
1238
* XXX: This is temporary until we have a separate string class for
1247
*-----------------------------------------------------------------------------
1251
string::zero_clear()
1253
if (mUtf16Cache != NULL) {
1254
Util_ZeroFree(mUtf16Cache,
1255
Unicode_UTF16Strlen(mUtf16Cache) * sizeof *mUtf16Cache);
1260
* This is a best effort. We aren't guaranteed that Glib::ustring doesn't
1261
* leave behind any internal copies of the string.
1263
if (mUstr.c_str() != mUstr.data()) {
1264
Util_Zero(const_cast<char *>(mUstr.c_str()), mUstr.bytes());
1266
Util_Zero(const_cast<char *>(mUstr.data()), mUstr.bytes());
1272
*-----------------------------------------------------------------------------
1274
* utf::string::erase --
1276
* Erase the contents of this string in the specified index range.
1279
* A reference to this object
1284
*-----------------------------------------------------------------------------
1288
string::erase(size_type i, // IN
1298
*-----------------------------------------------------------------------------
1300
* utf::string::erase --
1302
* Erase the contents of this string with given iterator.
1305
* The current iterator.
1310
*-----------------------------------------------------------------------------
1314
string::erase(iterator p) // IN
1317
return mUstr.erase(p);
1322
string::erase(iterator pbegin, // IN
1323
iterator pend) // IN
1326
return mUstr.erase(pbegin, pend);
1330
*-----------------------------------------------------------------------------
1332
* utf::string::replace --
1334
* Replace the string contents specified by the range, with the passed in
1338
* A reference to this object.
1343
*-----------------------------------------------------------------------------
1347
string::replace(size_type i, // IN
1349
const string &s) // IN
1352
mUstr.replace(i, n, s.mUstr);
1358
*-----------------------------------------------------------------------------
1360
* utf::string::replace --
1362
* Mutates this string by replacing all occurrences of one string with
1366
* A reference to this object.
1371
*-----------------------------------------------------------------------------
1375
string::replace(const string &from, // IN
1376
const string &to) // IN
1379
size_type start = 0;
1380
size_type fromSize = from.length();
1383
while ((end = find(from, start)) != string::npos) {
1384
result += substr(start, end - start);
1387
start = end + fromSize;
1390
if (start < length()) {
1391
result += substr(start);
1400
*-----------------------------------------------------------------------------
1402
* utf::string::replace_copy --
1405
* Returns a new string with all occurrences of one string replaced by
1411
*-----------------------------------------------------------------------------
1415
string::replace_copy(const string& from, // IN
1416
const string& to) // IN
1419
return string(*this).replace(from, to);
1424
*-----------------------------------------------------------------------------
1426
* utf::string::compare --
1428
* A 3-way (output -1, 0, or 1) string comparison. Compares each Unicode
1429
* code point of this string to the argument string.
1432
* -1 if *this < s, 0 if *this == s, 1 if *this > s.
1437
*-----------------------------------------------------------------------------
1441
string::compare(const string &s, // IN
1442
bool ignoreCase) // IN/OPT: false by default
1446
? Unicode_CompareIgnoreCase(c_str(), s.c_str())
1447
: Unicode_Compare(c_str(), s.c_str());
1452
string::compare(size_type i, // IN
1454
const string &s) // IN
1457
return mUstr.compare(i, n, s.mUstr);
1462
*-----------------------------------------------------------------------------
1464
* utf::string::compareLength --
1466
* A 3-way (output -1, 0, or 1) string comparison with given length.
1467
* Compares only the first len characters (in code units) of the strings.
1470
* -1 if *this < s, 0 if *this == s, 1 if *this > s.
1475
*-----------------------------------------------------------------------------
1479
string::compareLength(const string &s, // IN
1480
size_type len, // IN: length in code-point
1481
bool ignoreCase) // IN/OPT: false by default
1484
return substr(0, len).compare(s.substr(0, len), ignoreCase);
1489
*-----------------------------------------------------------------------------
1491
* utf::string::compareRange --
1493
* A 3-way (output -1, 0, or 1) string comparison with given length.
1494
* Compares the substrings from this string [thisStart ~ thisStart + thisLength-1]
1495
* with the input string str [strStart ~ strStart + strLength - 1].
1498
* -1 if *this < s, 0 if *this == s, 1 if *this > s.
1503
*-----------------------------------------------------------------------------
1507
string::compareRange(size_type thisStart, // IN: index in code-point
1508
size_type thisLength, // IN: length in code-point
1509
const string &str, // IN
1510
size_type strStart, // IN: index in code-point
1511
size_type strLength, // IN: length in code-point
1512
bool ignoreCase) // IN/OPT: false by default
1515
return substr(thisStart, thisLength).compare(str.substr(strStart, strLength), ignoreCase);
1520
*-----------------------------------------------------------------------------
1522
* utf::string::find --
1524
* Searches for the first occurrence of the input string inside this string.
1527
* If s is found, then, it returns the first starting index of the input string.
1528
* Otherwise, returns npos.
1533
*-----------------------------------------------------------------------------
1537
string::find(const string &s, // IN
1538
size_type pos) // IN/OPT
1541
return mUstr.find(s.mUstr, pos);
1546
string::find(value_type uc, // IN
1547
size_type pos) // IN/OPT
1550
return mUstr.find(uc, pos);
1555
*-----------------------------------------------------------------------------
1557
* utf::string::rfind --
1559
* Searches for the last occurrence of the input string inside this string.
1562
* If s is found, then, it returns the last starting index of the input string.
1563
* Otherwise, returns npos.
1568
*-----------------------------------------------------------------------------
1572
string::rfind(const string &s, // IN
1573
size_type pos) // IN/OPT
1576
return mUstr.rfind(s.mUstr, pos);
1581
string::rfind(value_type uc, // IN
1582
size_type pos) // IN/OPT
1585
return mUstr.rfind(uc, pos);
1590
*-----------------------------------------------------------------------------
1592
* utf::string::find_first_of --
1594
* Find the first occurrence of 's' in this string. 'i' determines where in
1595
* the current string we start searching for 's'
1598
* If s is found, then, it returns the index where s occurs in this
1600
* Otherwise, returns npos.
1605
*-----------------------------------------------------------------------------
1609
string::find_first_of(const string &s, // IN
1610
size_type i) // IN/OPT
1613
return mUstr.find_first_of(s.mUstr, i);
1618
string::find_first_of(value_type uc, // IN
1619
size_type i) // IN/OPT
1622
return mUstr.find_first_of(uc, i);
1627
*-----------------------------------------------------------------------------
1629
* utf::string::find_first_not_of --
1631
* Find the first occurrence of a string NOT in 's' in this string. 'i'
1632
* determines where in this string we start searching to NOT 's'.
1635
* Returns the index of the first sequence in this string that is not 's'
1636
* Otherwise, returns npos.
1641
*-----------------------------------------------------------------------------
1645
string::find_first_not_of(const string &s, // IN
1646
size_type i) // IN/OPT
1649
return mUstr.find_first_not_of(s.mUstr, i);
1654
string::find_first_not_of(value_type uc, // IN
1655
size_type i) // IN/OPT
1658
return mUstr.find_first_not_of(uc, i);
1663
*-----------------------------------------------------------------------------
1665
* utf::string::find_last_of --
1667
* Does a reverse search in this string for 's'. 'i' determines where we
1668
* start the search for in this string.
1671
* If s is found, then, it returns the index where s occurs in this
1673
* Otherwise, returns npos.
1678
*-----------------------------------------------------------------------------
1682
string::find_last_of(const string &s, // IN
1683
size_type i) // IN/OPT
1686
return mUstr.find_last_of(s.mUstr, i);
1691
string::find_last_of(value_type uc, // IN
1692
size_type i) // IN/OPT
1695
return mUstr.find_last_of(uc, i);
1700
*-----------------------------------------------------------------------------
1702
* utf::string::find_last_not_of --
1704
* Searches for the last character within the current string that does
1705
* not match any characters in 's'. 'i' determines where we start the
1706
* search for in this string. (moving backwards).
1709
* If NOT 's' is found, then, it returns the index where s does not occurs
1711
* Otherwise, returns npos.
1716
*-----------------------------------------------------------------------------
1720
string::find_last_not_of(const string &s, // IN
1721
size_type i) // IN/OPT
1724
return mUstr.find_last_not_of(s.mUstr, i);
1729
string::find_last_not_of(value_type uc, // IN
1730
size_type i) // IN/OPT
1733
return mUstr.find_last_not_of(uc, i);
1738
*-----------------------------------------------------------------------------
1740
* utf::string::substr --
1742
* Create a substring of this string with given range.
1745
* The newly created string.
1750
*-----------------------------------------------------------------------------
1754
string::substr(size_type start, // IN
1755
size_type len) // IN
1758
return string(mUstr.substr(start, len));
1763
*-----------------------------------------------------------------------------
1765
* utf::string::operator[] --
1767
* Get the UTF-32 character at given index in this string.
1770
* UTF-32 character (gunichar).
1775
*-----------------------------------------------------------------------------
1779
string::operator[](size_type i) // IN
1787
*-----------------------------------------------------------------------------
1789
* utf::string::startsWith --
1791
* Tests if the current string starts with 's'
1794
* true if current string starts with 's', false otherwise
1799
*-----------------------------------------------------------------------------
1803
string::startsWith(const string &s, // IN
1804
bool ignoreCase) // IN/OPT: false by default
1807
return UnicodeStartsWith(c_str(), s.c_str(), ignoreCase);
1812
*-----------------------------------------------------------------------------
1814
* utf::string::endsWith --
1816
* Tests if the current string ends with 's'
1819
* true if current string ends with 's', false otherwise
1824
*-----------------------------------------------------------------------------
1828
string::endsWith(const string &s, // IN
1829
bool ignoreCase) // IN/OPT: false by default
1832
return UnicodeEndsWith(c_str(), s.c_str(), ignoreCase);
1837
*-----------------------------------------------------------------------------
1839
* utf::string::split --
1841
* Return a vector of utf::strings. The vector contains the elements of
1842
* the string split by the passed in separator. Empty tokens are not
1845
* "1,2,3".split(",") -> ["1", "2", "3"]
1846
* "1,,".split(",") -> ["1", "", ""]
1847
* "1".split(",") -> ["1"]
1849
* XXX If this is to be used for things like command line parsing, support
1850
* for quoted strings needs to be added.
1853
* A vector of utf::strings
1858
*-----------------------------------------------------------------------------
1862
string::split(const string &sep) // IN
1865
std::vector<string> splitStrings;
1866
size_type sIndex = 0;
1867
size_type sepLen = sep.length();
1872
size_type index = find(sep, sIndex);
1873
if (index == npos) {
1874
splitStrings.push_back(substr(sIndex));
1878
splitStrings.push_back(substr(sIndex, index - sIndex));
1879
sIndex = index + sepLen;
1882
return splitStrings;
1887
*-----------------------------------------------------------------------------
1889
* utf::string::GetUtf16Cache --
1891
* Return the UTF-16 representation of the current string, this value is
1892
* cached, in the object. If the cache is not valid (NULL), then create
1896
* A UTF-16 representation of the current string
1899
* Allocates a UTF16 string
1901
*-----------------------------------------------------------------------------
1905
string::GetUtf16Cache()
1908
if (mUtf16Cache == NULL) {
1909
mUtf16Cache = Unicode_GetAllocUTF16(c_str());
1917
*-----------------------------------------------------------------------------
1919
* utf::string::InvalidateCache --
1921
* Frees the cache in this string.
1929
*-----------------------------------------------------------------------------
1933
string::InvalidateCache()
1937
mUtf16Length = npos;
1942
*-----------------------------------------------------------------------------
1944
* utf::string::operator+ --
1946
* Create a new string by appending the input string to this string.
1948
* NOTE: This is not the same as append. append() will modify the
1949
* current object, while this will return a new object.
1952
* The newly created string.
1957
*-----------------------------------------------------------------------------
1961
string::operator+(const string &rhs) // IN
1964
return mUstr + rhs.mUstr;
1969
string::operator+(value_type uc) // IN
1977
*-----------------------------------------------------------------------------
1979
* utf::string::operator== --
1981
* Equality operator for string objects
1984
* true or false (true if equal)
1989
*-----------------------------------------------------------------------------
1993
string::operator==(const string &rhs) // IN
1996
return compare(rhs) == 0;
2001
*-----------------------------------------------------------------------------
2003
* utf::string::operator!= --
2005
* Inequality operator for string objects
2008
* true or false (true if not equal)
2013
*-----------------------------------------------------------------------------
2017
string::operator!=(const string &rhs) // IN
2020
return compare(rhs) != 0;
2025
*-----------------------------------------------------------------------------
2027
* utf::string::operator< --
2029
* Less than operator for string objects
2032
* true or false (true if lhs is < rhs)
2037
*-----------------------------------------------------------------------------
2041
string::operator<(const string &rhs) // IN
2044
return compare(rhs) < 0;
2049
*-----------------------------------------------------------------------------
2051
* utf::string::operator> --
2053
* Greater than operator for string objects
2056
* true or false (true if lhs is > rhs)
2061
*-----------------------------------------------------------------------------
2065
string::operator>(const string &rhs) // IN
2068
return compare(rhs) > 0;
2073
*-----------------------------------------------------------------------------
2075
* utf::string::operator<= --
2077
* Less than or equal than operator for string objects
2080
* true or false (true if lhs is <= rhs)
2085
*-----------------------------------------------------------------------------
2089
string::operator<=(const string &rhs) // IN
2092
return compare(rhs) <= 0;
2097
*-----------------------------------------------------------------------------
2099
* utf::string::operator>= --
2101
* Greater than or equal than operator for string objects
2104
* true or false (true if lhs is >= rhs)
2109
*-----------------------------------------------------------------------------
2113
string::operator>=(const string &rhs) // IN
2116
return compare(rhs) >= 0;
2121
*-----------------------------------------------------------------------------
2123
* utf::string::begin --
2125
* Returns an iterator to the start of the string.
2133
*-----------------------------------------------------------------------------
2139
return mUstr.begin();
2143
string::const_iterator
2147
return mUstr.begin();
2152
*-----------------------------------------------------------------------------
2154
* utf::string::end --
2156
* Returns an iterator to the end of the string.
2164
*-----------------------------------------------------------------------------
2174
string::const_iterator
2183
*-----------------------------------------------------------------------------
2187
* Validates the string.
2190
* true if the string contains is valid UTF-8, false otherwise.
2195
*-----------------------------------------------------------------------------
2199
Validate(const Glib::ustring& s) // IN
2201
bool isValid = s.validate();
2203
char *escaped = Unicode_EscapeBuffer(s.c_str(), -1, STRING_ENCODING_UTF8);
2204
Warning("Invalid UTF-8 string: \"%s\"\n", escaped);
2212
*-----------------------------------------------------------------------------
2214
* utf::CreateWithLength --
2216
* A wrapper function for Unicode_AllocWithLength() that returns a utf::string.
2219
* A utf::string created with given parameters.
2224
*-----------------------------------------------------------------------------
2228
CreateWithLength(const void *buffer, // IN
2229
ssize_t lengthInBytes, // IN: NUL not included
2230
StringEncoding encoding) // IN
2232
if (!Unicode_IsBufferValid(buffer, lengthInBytes, encoding)) {
2233
throw ConversionError();
2236
Unicode utf8 = Unicode_AllocWithLength(buffer, lengthInBytes, encoding);
2239
string result(utf8);
2250
*----------------------------------------------------------------------
2252
* utf::CreateWithBOMBuffer --
2254
* Convert a text buffer with BOM (byte-order mark) to utf::string.
2255
* If BOM not present, assume it's UTF-8.
2258
* A utf::string containing the text buffer.
2263
*----------------------------------------------------------------------
2267
CreateWithBOMBuffer(const void *buffer, // IN
2268
ssize_t lengthInBytes) // IN: NUL not included
2271
uint8 bom[4]; // BOM with max size.
2272
ssize_t len; // Length of BOM.
2273
StringEncoding encoding; // Encoding if a BOM is present.
2276
static const BOMMap mapBOM[] = {
2277
{{0}, 0, STRING_ENCODING_UTF8 }, // Default encoding.
2278
{{0xEF, 0xBB, 0xBF}, 3, STRING_ENCODING_UTF8 },
2279
{{0xFE, 0xFF}, 2, STRING_ENCODING_UTF16_BE },
2280
{{0xFF, 0xFE}, 2, STRING_ENCODING_UTF16_LE },
2281
{{0x00, 0x00, 0xFE, 0xFF}, 4, STRING_ENCODING_UTF32_BE },
2282
{{0xFF, 0xFE, 0x00, 0x00}, 4, STRING_ENCODING_UTF32_LE }
2285
ASSERT(lengthInBytes >= 0);
2286
unsigned int index = 0; // Default encoding, no need to check.
2287
for (unsigned int i = 1; i < ARRAYSIZE(mapBOM); i++) {
2288
if ( lengthInBytes >= mapBOM[i].len
2289
&& memcmp(mapBOM[i].bom, buffer, mapBOM[i].len) == 0) {
2295
return CreateWithLength(reinterpret_cast<const char*>(buffer) + mapBOM[index].len,
2296
lengthInBytes - mapBOM[index].len,
2297
mapBOM[index].encoding);
2302
*-----------------------------------------------------------------------------
2306
* Converts an integer to a utf::string.
2309
* A utf::string created with the given integer.
2314
*-----------------------------------------------------------------------------
2318
IntToStr(int64 val) // IN
2320
std::ostringstream ostream;
2322
return ostream.str().c_str();
2327
*-----------------------------------------------------------------------------
2331
* Copies an array to a vector.
2332
* Guaranteed to not shrink the vector.
2335
* A vector containing a shallow copy of the array.
2340
*-----------------------------------------------------------------------------
2343
template<typename T>
2345
CopyArray(const T *p, // IN:
2346
size_t n, // IN: The number of array elements to copy.
2347
std::vector<T>& buf) // OUT:
2349
if (n > buf.size()) {
2355
memcpy(&buf[0], p, n * sizeof buf[0]);
2361
*-----------------------------------------------------------------------------
2363
* utf::CreateWritableBuffer --
2365
* Copies a utf::string to a writable buffer.
2366
* Guaranteed to never shrink the size of the destination buffer.
2369
* A std::vector containing the NUL-terminated string data.
2370
* The size of the resulting vector may exceed the length of the
2371
* NUL-terminated string.
2376
*-----------------------------------------------------------------------------
2380
CreateWritableBuffer(const string& s, // IN:
2381
std::vector<char>& buf) // OUT: A copy of the string, as UTF-8.
2383
CopyArray(s.c_str(), s.bytes() + 1, buf);
2388
CreateWritableBuffer(const string& s, // IN:
2389
std::vector<utf16_t>& buf) // OUT: A copy of the string, as UTF-16.
2391
CopyArray(s.w_str(), s.w_size() + 1, buf);