1
/*********************************************************
2
* Copyright (C) 2008 VMware, Inc. All rights reserved.
4
* This program is free software; you can redistribute it and/or modify it
5
* under the terms of the GNU Lesser General Public License as published
6
* by the Free Software Foundation version 2.1 and no later version.
8
* This program is distributed in the hope that it will be useful, but
9
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
10
* or FITNESS FOR A PARTICULAR PURPOSE. See the Lesser GNU General Public
11
* License for more details.
13
* You should have received a copy of the GNU Lesser General Public License
14
* along with this program; if not, write to the Free Software Foundation, Inc.,
15
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
17
*********************************************************/
22
* A string wrapper for bora/lib/unicode. This class is intended to provide
23
* more c++ features such as operator overloading, automatic string conversion
24
* between different types of string classes.
38
* Initialize static scope variables,
40
* Note that with the way this is done, it's important not to delay load glib
41
* libraries. See bug 397373 for more details. If you're getting crazy values
42
* for utf::string::npos, check your linker flags.
44
const string::size_type string::npos = Glib::ustring::npos;
48
*-----------------------------------------------------------------------------
50
* utf::string::string --
60
*-----------------------------------------------------------------------------
72
*-----------------------------------------------------------------------------
74
* utf::string::string --
84
*-----------------------------------------------------------------------------
87
string::string(ConstUnicode s) // IN
93
mUstr = Unicode_GetUTF8(s);
94
ASSERT(Validate(mUstr));
101
*-----------------------------------------------------------------------------
103
* utf::string::init_bstr_t --
105
* Utility function to construct from a _bstr_t object.
106
* Copies the UTF-16 representation of the _bstr_t.
112
* Makes a copy of the _bstr_t data and frees that data when
113
* the utf::string is destroyed.
118
*-----------------------------------------------------------------------------
122
string::init_bstr_t(const _bstr_t &s) // IN
124
// If the input is empty, then there's nothing to do.
125
if (s.length() == 0) {
129
Unicode utf8 = Unicode_AllocWithUTF16(static_cast<const utf16_t *>(s));
139
ASSERT(Validate(mUstr));
144
*-----------------------------------------------------------------------------
146
* utf::string::string --
148
* Constructor from a ubstr_t object. Copies the UTF-16 representation of
155
* Makes a copy of the ubstr_t data and frees that data when the
156
* utf::string is destroyed.
161
*-----------------------------------------------------------------------------
164
string::string(const ubstr_t &s) // IN
169
// If the input is empty, then there's nothing to do.
170
if (s.length() == 0) {
174
mUstr = static_cast<const char *>(s);
175
ASSERT(Validate(mUstr));
180
*-----------------------------------------------------------------------------
182
* utf::string::string --
184
* Constructor from a _bstr_t object. Copies the UTF-16 representation of
185
* the _bstr_t. Needed for dealing with _com_error::Description().
191
* Makes a copy of the _bstr_t data and frees that data when
192
* the utf::string is destroyed.
197
*-----------------------------------------------------------------------------
200
string::string(const _bstr_t &s) // IN
210
*-----------------------------------------------------------------------------
212
* utf::string::string --
214
* Constructor from a uvariant_t object. Copies the UTF-16 representation
215
* of the ubstr_t interface.
221
* Makes a copy of the uvariant_t data and frees that data when the
222
* utf::string is destroyed.
227
*-----------------------------------------------------------------------------
230
string::string(const uvariant_t &v) // IN
240
Warning("Invalid uvariant_t to ubstr_t conversion.\n");
244
// If the input is empty, then there's nothing to do.
245
if (s.length() == 0) {
249
mUstr = static_cast<const char *>(s);
250
ASSERT(Validate(mUstr));
255
*-----------------------------------------------------------------------------
257
* utf::string::string --
259
* Constructor from a _variant_t object. Copies the UTF-16 representation
266
* Makes a copy of the _variant_t data and frees that data when
267
* the utf::string is destroyed.
272
*-----------------------------------------------------------------------------
275
string::string(const _variant_t &v) // IN
285
Warning("Invalid _variant_t to _bstr_t conversion.\n");
296
*-----------------------------------------------------------------------------
298
* utf::string::string --
308
*-----------------------------------------------------------------------------
311
string::string(const utf16string &s) // IN
316
// If the input is empty, then there's nothing to do.
321
string copy(s.c_str());
327
*-----------------------------------------------------------------------------
329
* utf::string::string --
339
*-----------------------------------------------------------------------------
342
string::string(const utf16_t *s) // IN
350
* Since we already have a UTF-16 representation of the string, copy it
353
mUtf16Cache = Unicode_UTF16Strdup(s);
355
Unicode utf8 = Unicode_AllocWithUTF16(s);
365
ASSERT(Validate(mUstr));
370
*-----------------------------------------------------------------------------
372
* utf::string::string --
382
*-----------------------------------------------------------------------------
385
string::string(const char *s, // IN
386
StringEncoding encoding) // IN
393
Unicode utf8 = Unicode_Alloc(s, encoding);
403
ASSERT(Validate(mUstr));
408
*-----------------------------------------------------------------------------
410
* utf::string::string --
414
* XXX: When initializing mUstr, we do a deep copy of the string data
415
* instead of just calling mUstr(s). This is because Glib::ustring is very
416
* smart about sharing storage, and zero_clear is very dumb. Once we get
417
* rid of zero_clear and have a separate sensitive-string class, this can
418
* go back to being simple.
426
*-----------------------------------------------------------------------------
429
string::string(const Glib::ustring &s) // IN
439
*-----------------------------------------------------------------------------
441
* utf::string::string --
445
* XXX: When initializing mUstr, we do a deep copy of the string data
446
* instead of just calling mUstr(s). This is because Glib::ustring is very
447
* smart about sharing storage, and zero_clear is very dumb. Once we get
448
* rid of zero_clear and have a separate sensitive-string class, this can
449
* go back to being simple.
457
*-----------------------------------------------------------------------------
460
string::string(const string &s) // IN
461
: mUstr(s.mUstr.c_str()),
469
*-----------------------------------------------------------------------------
471
* utf::string::~string --
481
*-----------------------------------------------------------------------------
491
*-----------------------------------------------------------------------------
493
* utf::string::operator Glib::ustring --
495
* Implicit conversion to Glib::ustring operator
498
* The internal Glib::ustring object.
503
*-----------------------------------------------------------------------------
506
string::operator const Glib::ustring& ()
516
*-----------------------------------------------------------------------------
518
* utf::string::operator ubstr_t --
520
* Implicit conversion to ubstr_t
523
* The current ubstr_t string. NUL-terminated.
529
* This function is only defined in _WIN32
531
*-----------------------------------------------------------------------------
534
string::operator const ubstr_t()
537
return ubstr_t(GetUtf16Cache());
544
*-----------------------------------------------------------------------------
546
* utf::string::operator= --
548
* Assignment operator.
551
* A reference to this string.
556
*-----------------------------------------------------------------------------
560
string::operator=(string copy) // IN
568
*-----------------------------------------------------------------------------
570
* utf::string::operator+= --
572
* Append operator of the utf::string class.
575
* A reference to this string.
580
*-----------------------------------------------------------------------------
584
string::operator+=(const string &s) // IN
591
string::operator+=(value_type uc) // IN
599
*-----------------------------------------------------------------------------
601
* utf::string::swap --
603
* Swaps the contents with a given utf::string.
611
*-----------------------------------------------------------------------------
615
string::swap(string &s) // IN/OUT
618
std::swap(mUtf16Cache, s.mUtf16Cache);
619
std::swap(mUtf16Length, s.mUtf16Length);
624
*-----------------------------------------------------------------------------
626
* utf::string::resize --
628
* Change the size of this utf::string.
636
*-----------------------------------------------------------------------------
640
string::resize(size_type n, // IN
641
value_type c) // IN/OPT
649
*-----------------------------------------------------------------------------
651
* utf::string::c_str --
653
* Get the UTF-8 representation of this string.
656
* The current string with UTF-8 encoding. NUL-terminated.
661
*-----------------------------------------------------------------------------
668
return mUstr.c_str();
673
*-----------------------------------------------------------------------------
675
* utf::string::w_str --
677
* Get the UTF-16 representation of this string.
680
* The current string with UTF-16 (host-endian) encoding. NUL-terminated.
685
*-----------------------------------------------------------------------------
692
return GetUtf16Cache();
697
*-----------------------------------------------------------------------------
699
* utf::string::ustr --
701
* Get the Glib::ustring backing of this string.
704
* The internal Glib::ustring object.
709
*-----------------------------------------------------------------------------
721
*-----------------------------------------------------------------------------
723
* utf::string::empty --
725
* Test if this is an empty string.
728
* true if it's an empty string, otherwise false.
733
*-----------------------------------------------------------------------------
740
return mUstr.empty();
745
*-----------------------------------------------------------------------------
747
* utf::string::size --
750
* Returns the length of this string, in characters (code points),
756
*-----------------------------------------------------------------------------
768
*-----------------------------------------------------------------------------
770
* utf::string::w_size --
773
* Returns the length of this string, in UTF-16 code units,
779
*-----------------------------------------------------------------------------
786
if (mUtf16Length == npos) {
787
mUtf16Length = Unicode_UTF16Strlen(GetUtf16Cache());
795
*-----------------------------------------------------------------------------
797
* utf::string::length --
800
* Returns the length of this string, in characters (code points),
801
* excluding NUL. (Same as size().)
806
*-----------------------------------------------------------------------------
818
*-----------------------------------------------------------------------------
820
* utf::string::bytes --
823
* Returns the number of bytes used by the UTF-8 representation of this
824
* string, excluding NUL.
829
*-----------------------------------------------------------------------------
836
return mUstr.bytes();
841
*-----------------------------------------------------------------------------
843
* utf::string::foldCase --
845
* Returns the case-folded string of this string.
848
* The newly created string.
853
*-----------------------------------------------------------------------------
860
return string(mUstr.casefold());
865
*-----------------------------------------------------------------------------
867
* utf::string::trim --
869
* Returns the whitespace-trimmed version of this string.
872
* The newly created string.
877
*-----------------------------------------------------------------------------
884
Unicode trim = Unicode_Trim(c_str());
892
*-----------------------------------------------------------------------------
894
* utf::string::trimLeft --
896
* Get the left-trimmed version of this string.
899
* The newly created string.
904
*-----------------------------------------------------------------------------
911
Unicode trim = Unicode_TrimLeft(c_str());
919
*-----------------------------------------------------------------------------
921
* utf::string::trimRight --
923
* Get the right-trimmed version of this string.
926
* The newly created string.
931
*-----------------------------------------------------------------------------
938
Unicode trim = Unicode_TrimRight(c_str());
946
*-----------------------------------------------------------------------------
948
* utf::string::normalize --
950
* Creates a new string by normalizing the input string.
953
* The newly created string.
958
*-----------------------------------------------------------------------------
962
string::normalize(NormalizeMode mode) // IN
965
return mUstr.normalize((Glib::NormalizeMode)mode);
969
#ifdef SUPPORT_UNICODE
972
*-----------------------------------------------------------------------------
974
* utf::string::toLower --
976
* Creates a new string by lower-casing the input string using
977
* the rules of the specified locale.
980
* The newly created string.
985
*-----------------------------------------------------------------------------
989
string::toLower(const char *locale) // IN
992
Unicode lower = Unicode_ToLower(c_str(), locale);
993
string results(lower);
1001
*-----------------------------------------------------------------------------
1003
* utf::string::toUpper --
1005
* Creates a new string by upper-casing the input string using
1006
* the rules of the specified locale.
1009
* The newly created string.
1014
*-----------------------------------------------------------------------------
1018
string::toUpper(const char *locale) // IN
1021
Unicode upper = Unicode_ToUpper(c_str(), locale);
1022
string results(upper);
1023
Unicode_Free(upper);
1030
*-----------------------------------------------------------------------------
1032
* utf::string::toTitle --
1034
* Creates a new string by title-casing the input string using
1035
* the rules of the specified locale.
1038
* The newly created string.
1043
*-----------------------------------------------------------------------------
1047
string::toTitle(const char *locale) // IN
1050
Unicode title = Unicode_ToTitle(c_str(), locale);
1051
string results(title);
1052
Unicode_Free(title);
1061
*-----------------------------------------------------------------------------
1063
* utf::string::append --
1065
* Appends the argument string to this utf::string.
1068
* A reference to this object.
1073
*-----------------------------------------------------------------------------
1077
string::append(const string &s) // IN
1080
mUstr.append(s.mUstr);
1087
string::append(const string &s, // IN
1092
mUstr.append(s.mUstr, i, n);
1099
*-----------------------------------------------------------------------------
1101
* utf::string::push_back --
1103
* Appends the character at the end of this string.
1111
*-----------------------------------------------------------------------------
1115
string::push_back(value_type uc) // IN
1118
mUstr.push_back(uc);
1123
*-----------------------------------------------------------------------------
1125
* utf::string::assign --
1127
* Assigns the passed in string to this string.
1129
* Callers should prefer using operator= instead of assign().
1132
* A reference to this object
1137
*-----------------------------------------------------------------------------
1141
string::assign(const string &s) // IN
1143
return operator=(s);
1148
*-----------------------------------------------------------------------------
1150
* utf::string::insert --
1152
* Inserts the argument string to this string at index i, return this
1155
* These are passthrough calls to the Glib::insert calls.
1158
* A reference to this object
1163
*-----------------------------------------------------------------------------
1167
string::insert(size_type i, // IN
1168
const string &s) // IN
1171
mUstr.insert(i, s.mUstr);
1177
string::insert(size_type i, // IN
1182
mUstr.insert(i, n, uc);
1188
*-----------------------------------------------------------------------------
1190
* utf::string::clear --
1192
* Clears this string.
1200
*-----------------------------------------------------------------------------
1212
*-----------------------------------------------------------------------------
1214
* utf::string::zero_clear --
1216
* Zeroes and clears this string.
1218
* XXX: This is temporary until we have a separate string class for
1227
*-----------------------------------------------------------------------------
1231
string::zero_clear()
1233
if (mUtf16Cache != NULL) {
1234
Util_ZeroFree(mUtf16Cache,
1235
Unicode_UTF16Strlen(mUtf16Cache) * sizeof *mUtf16Cache);
1240
* This is a best effort. We aren't guaranteed that Glib::ustring doesn't
1241
* leave behind any internal copies of the string.
1243
if (mUstr.c_str() != mUstr.data()) {
1244
Util_Zero(const_cast<char *>(mUstr.c_str()), mUstr.bytes());
1246
Util_Zero(const_cast<char *>(mUstr.data()), mUstr.bytes());
1252
*-----------------------------------------------------------------------------
1254
* utf::string::erase --
1256
* Erase the contents of this string in the specified index range.
1259
* A reference to this object
1264
*-----------------------------------------------------------------------------
1268
string::erase(size_type i, // IN
1278
*-----------------------------------------------------------------------------
1280
* utf::string::erase --
1282
* Erase the contents of this string with given iterator.
1285
* The current iterator.
1290
*-----------------------------------------------------------------------------
1294
string::erase(iterator p) // IN
1297
return mUstr.erase(p);
1302
string::erase(iterator pbegin, // IN
1303
iterator pend) // IN
1306
return mUstr.erase(pbegin, pend);
1310
*-----------------------------------------------------------------------------
1312
* utf::string::replace --
1314
* Replace the string contents specified by the range, with the passed in
1318
* A reference to this object.
1323
*-----------------------------------------------------------------------------
1327
string::replace(size_type i, // IN
1329
const string &s) // IN
1332
mUstr.replace(i, n, s.mUstr);
1338
*-----------------------------------------------------------------------------
1340
* utf::string::replace --
1342
* Mutates this string by replacing all occurrences of one string with
1346
* A reference to this object.
1351
*-----------------------------------------------------------------------------
1355
string::replace(const string &from, // IN
1356
const string &to) // IN
1359
size_type start = 0;
1360
size_type fromSize = from.length();
1363
while ((end = find(from, start)) != string::npos) {
1364
result += substr(start, end - start);
1367
start = end + fromSize;
1370
if (start < length()) {
1371
result += substr(start);
1380
*-----------------------------------------------------------------------------
1382
* utf::string::replace_copy --
1385
* Returns a new string with all occurrences of one string replaced by
1391
*-----------------------------------------------------------------------------
1395
string::replace_copy(const string& from, // IN
1396
const string& to) // IN
1399
return string(*this).replace(from, to);
1404
*-----------------------------------------------------------------------------
1406
* utf::string::compare --
1408
* A 3-way (output -1, 0, or 1) string comparison. Compares each Unicode
1409
* code point of this string to the argument string.
1412
* -1 if *this < s, 0 if *this == s, 1 if *this > s.
1417
*-----------------------------------------------------------------------------
1421
string::compare(const string &s, // IN
1422
bool ignoreCase) // IN/OPT: false by default
1426
? Unicode_CompareIgnoreCase(c_str(), s.c_str())
1427
: Unicode_Compare(c_str(), s.c_str());
1432
string::compare(size_type i, // IN
1434
const string &s) // IN
1437
return mUstr.compare(i, n, s.mUstr);
1442
*-----------------------------------------------------------------------------
1444
* utf::string::compareLength --
1446
* A 3-way (output -1, 0, or 1) string comparison with given length.
1447
* Compares only the first len characters (in code units) of the strings.
1450
* -1 if *this < s, 0 if *this == s, 1 if *this > s.
1455
*-----------------------------------------------------------------------------
1459
string::compareLength(const string &s, // IN
1460
size_type len, // IN: length in code-point
1461
bool ignoreCase) // IN/OPT: false by default
1464
return substr(0, len).compare(s.substr(0, len), ignoreCase);
1469
*-----------------------------------------------------------------------------
1471
* utf::string::compareRange --
1473
* A 3-way (output -1, 0, or 1) string comparison with given length.
1474
* Compares the substrings from this string [thisStart ~ thisStart + thisLength-1]
1475
* with the input string str [strStart ~ strStart + strLength - 1].
1478
* -1 if *this < s, 0 if *this == s, 1 if *this > s.
1483
*-----------------------------------------------------------------------------
1487
string::compareRange(size_type thisStart, // IN: index in code-point
1488
size_type thisLength, // IN: length in code-point
1489
const string &str, // IN
1490
size_type strStart, // IN: index in code-point
1491
size_type strLength, // IN: length in code-point
1492
bool ignoreCase) // IN/OPT: false by default
1495
return substr(thisStart, thisLength).compare(str.substr(strStart, strLength), ignoreCase);
1500
*-----------------------------------------------------------------------------
1502
* utf::string::find --
1504
* Searches for the first occurrence of the input string inside this string.
1507
* If s is found, then, it returns the first starting index of the input string.
1508
* Otherwise, returns npos.
1513
*-----------------------------------------------------------------------------
1517
string::find(const string &s, // IN
1518
size_type pos) // IN/OPT
1521
return mUstr.find(s.mUstr, pos);
1526
string::find(value_type uc, // IN
1527
size_type pos) // IN/OPT
1530
return mUstr.find(uc, pos);
1535
*-----------------------------------------------------------------------------
1537
* utf::string::rfind --
1539
* Searches for the last occurrence of the input string inside this string.
1542
* If s is found, then, it returns the last starting index of the input string.
1543
* Otherwise, returns npos.
1548
*-----------------------------------------------------------------------------
1552
string::rfind(const string &s, // IN
1553
size_type pos) // IN/OPT
1556
return mUstr.rfind(s.mUstr, pos);
1561
string::rfind(value_type uc, // IN
1562
size_type pos) // IN/OPT
1565
return mUstr.rfind(uc, pos);
1570
*-----------------------------------------------------------------------------
1572
* utf::string::find_first_of --
1574
* Find the first occurrence of 's' in this string. 'i' determines where in
1575
* the current string we start searching for 's'
1578
* If s is found, then, it returns the index where s occurs in this
1580
* Otherwise, returns npos.
1585
*-----------------------------------------------------------------------------
1589
string::find_first_of(const string &s, // IN
1590
size_type i) // IN/OPT
1593
return mUstr.find_first_of(s.mUstr, i);
1598
string::find_first_of(value_type uc, // IN
1599
size_type i) // IN/OPT
1602
return mUstr.find_first_of(uc, i);
1607
*-----------------------------------------------------------------------------
1609
* utf::string::find_first_not_of --
1611
* Find the first occurrence of a string NOT in 's' in this string. 'i'
1612
* determines where in this string we start searching to NOT 's'.
1615
* Returns the index of the first sequence in this string that is not 's'
1616
* Otherwise, returns npos.
1621
*-----------------------------------------------------------------------------
1625
string::find_first_not_of(const string &s, // IN
1626
size_type i) // IN/OPT
1629
return mUstr.find_first_not_of(s.mUstr, i);
1634
string::find_first_not_of(value_type uc, // IN
1635
size_type i) // IN/OPT
1638
return mUstr.find_first_not_of(uc, i);
1643
*-----------------------------------------------------------------------------
1645
* utf::string::find_last_of --
1647
* Does a reverse search in this string for 's'. 'i' determines where we
1648
* start the search for in this string.
1651
* If s is found, then, it returns the index where s occurs in this
1653
* Otherwise, returns npos.
1658
*-----------------------------------------------------------------------------
1662
string::find_last_of(const string &s, // IN
1663
size_type i) // IN/OPT
1666
return mUstr.find_last_of(s.mUstr, i);
1671
string::find_last_of(value_type uc, // IN
1672
size_type i) // IN/OPT
1675
return mUstr.find_last_of(uc, i);
1680
*-----------------------------------------------------------------------------
1682
* utf::string::find_last_not_of --
1684
* Searches for the last character within the current string that does
1685
* not match any characters in 's'. 'i' determines where we start the
1686
* search for in this string. (moving backwards).
1689
* If NOT 's' is found, then, it returns the index where s does not occurs
1691
* Otherwise, returns npos.
1696
*-----------------------------------------------------------------------------
1700
string::find_last_not_of(const string &s, // IN
1701
size_type i) // IN/OPT
1704
return mUstr.find_last_not_of(s.mUstr, i);
1709
string::find_last_not_of(value_type uc, // IN
1710
size_type i) // IN/OPT
1713
return mUstr.find_last_not_of(uc, i);
1718
*-----------------------------------------------------------------------------
1720
* utf::string::substr --
1722
* Create a substring of this string with given range.
1725
* The newly created string.
1730
*-----------------------------------------------------------------------------
1734
string::substr(size_type start, // IN
1735
size_type len) // IN
1738
return string(mUstr.substr(start, len));
1743
*-----------------------------------------------------------------------------
1745
* utf::string::operator[] --
1747
* Get the UTF-32 character at given index in this string.
1750
* UTF-32 character (gunichar).
1755
*-----------------------------------------------------------------------------
1759
string::operator[](size_type i) // IN
1767
*-----------------------------------------------------------------------------
1769
* utf::string::startsWith --
1771
* Tests if the current string starts with 's'
1774
* true if current string starts with 's', false otherwise
1779
*-----------------------------------------------------------------------------
1783
string::startsWith(const string &s, // IN
1784
bool ignoreCase) // IN/OPT: false by default
1787
return UnicodeStartsWith(c_str(), s.c_str(), ignoreCase);
1792
*-----------------------------------------------------------------------------
1794
* utf::string::endsWith --
1796
* Tests if the current string ends with 's'
1799
* true if current string ends with 's', false otherwise
1804
*-----------------------------------------------------------------------------
1808
string::endsWith(const string &s, // IN
1809
bool ignoreCase) // IN/OPT: false by default
1812
return UnicodeEndsWith(c_str(), s.c_str(), ignoreCase);
1817
*-----------------------------------------------------------------------------
1819
* utf::string::split --
1821
* Return a vector of utf::strings. The vector contains the elements of
1822
* the string split by the passed in separator. Empty tokens are not
1825
* "1,2,3".split(",") -> ["1", "2", "3"]
1826
* "1,,".split(",") -> ["1", "", ""]
1827
* "1".split(",") -> ["1"]
1829
* XXX If this is to be used for things like command line parsing, support
1830
* for quoted strings needs to be added.
1833
* A vector of utf::strings
1838
*-----------------------------------------------------------------------------
1842
string::split(const string &sep) // IN
1845
std::vector<string> splitStrings;
1846
size_type sIndex = 0;
1847
size_type sepLen = sep.length();
1852
size_type index = find(sep, sIndex);
1853
if (index == npos) {
1854
splitStrings.push_back(substr(sIndex));
1858
splitStrings.push_back(substr(sIndex, index - sIndex));
1859
sIndex = index + sepLen;
1862
return splitStrings;
1867
*-----------------------------------------------------------------------------
1869
* utf::string::GetUtf16Cache --
1871
* Return the UTF-16 representation of the current string, this value is
1872
* cached, in the object. If the cache is not valid (NULL), then create
1876
* A UTF-16 representation of the current string
1879
* Allocates a UTF16 string
1881
*-----------------------------------------------------------------------------
1885
string::GetUtf16Cache()
1888
if (mUtf16Cache == NULL) {
1889
mUtf16Cache = Unicode_GetAllocUTF16(c_str());
1897
*-----------------------------------------------------------------------------
1899
* utf::string::InvalidateCache --
1901
* Frees the cache in this string.
1909
*-----------------------------------------------------------------------------
1913
string::InvalidateCache()
1917
mUtf16Length = npos;
1922
*-----------------------------------------------------------------------------
1924
* utf::string::operator+ --
1926
* Create a new string by appending the input string to this string.
1928
* NOTE: This is not the same as append. append() will modify the
1929
* current object, while this will return a new object.
1932
* The newly created string.
1937
*-----------------------------------------------------------------------------
1941
string::operator+(const string &rhs) // IN
1944
return mUstr + rhs.mUstr;
1949
string::operator+(value_type uc) // IN
1957
*-----------------------------------------------------------------------------
1959
* utf::string::operator== --
1961
* Equality operator for string objects
1964
* true or false (true if equal)
1969
*-----------------------------------------------------------------------------
1973
string::operator==(const string &rhs) // IN
1976
return compare(rhs) == 0;
1981
*-----------------------------------------------------------------------------
1983
* utf::string::operator!= --
1985
* Inequality operator for string objects
1988
* true or false (true if not equal)
1993
*-----------------------------------------------------------------------------
1997
string::operator!=(const string &rhs) // IN
2000
return compare(rhs) != 0;
2005
*-----------------------------------------------------------------------------
2007
* utf::string::operator< --
2009
* Less than operator for string objects
2012
* true or false (true if lhs is < rhs)
2017
*-----------------------------------------------------------------------------
2021
string::operator<(const string &rhs) // IN
2024
return compare(rhs) < 0;
2029
*-----------------------------------------------------------------------------
2031
* utf::string::operator> --
2033
* Greater than operator for string objects
2036
* true or false (true if lhs is > rhs)
2041
*-----------------------------------------------------------------------------
2045
string::operator>(const string &rhs) // IN
2048
return compare(rhs) > 0;
2053
*-----------------------------------------------------------------------------
2055
* utf::string::operator<= --
2057
* Less than or equal than operator for string objects
2060
* true or false (true if lhs is <= rhs)
2065
*-----------------------------------------------------------------------------
2069
string::operator<=(const string &rhs) // IN
2072
return compare(rhs) <= 0;
2077
*-----------------------------------------------------------------------------
2079
* utf::string::operator>= --
2081
* Greater than or equal than operator for string objects
2084
* true or false (true if lhs is >= rhs)
2089
*-----------------------------------------------------------------------------
2093
string::operator>=(const string &rhs) // IN
2096
return compare(rhs) >= 0;
2101
*-----------------------------------------------------------------------------
2103
* utf::string::begin --
2105
* Returns an iterator to the start of the string.
2113
*-----------------------------------------------------------------------------
2119
return mUstr.begin();
2123
string::const_iterator
2127
return mUstr.begin();
2132
*-----------------------------------------------------------------------------
2134
* utf::string::end --
2136
* Returns an iterator to the end of the string.
2144
*-----------------------------------------------------------------------------
2154
string::const_iterator
2163
*-----------------------------------------------------------------------------
2167
* Validates the string.
2170
* true if the string contains is valid UTF-8, false otherwise.
2175
*-----------------------------------------------------------------------------
2179
Validate(const Glib::ustring& s) // IN
2181
bool isValid = s.validate();
2183
char *escaped = Unicode_EscapeBuffer(s.c_str(), -1, STRING_ENCODING_UTF8);
2184
Warning("Invalid UTF-8 string: \"%s\"\n", escaped);
2192
*-----------------------------------------------------------------------------
2194
* utf::CreateWithLength --
2196
* A wrapper function for Unicode_AllocWithLength() that returns a utf::string.
2199
* A utf::string created with given parameters.
2204
*-----------------------------------------------------------------------------
2208
CreateWithLength(const void *buffer, // IN
2209
ssize_t lengthInBytes, // IN: NUL not included
2210
StringEncoding encoding) // IN
2212
if (!Unicode_IsBufferValid(buffer, lengthInBytes, encoding)) {
2213
throw ConversionError();
2216
Unicode utf8 = Unicode_AllocWithLength(buffer, lengthInBytes, encoding);
2219
string result(utf8);
2230
*----------------------------------------------------------------------
2232
* utf::CreateWithBOMBuffer --
2234
* Convert a text buffer with BOM (byte-order mark) to utf::string.
2235
* If BOM not present, assume it's UTF-8.
2238
* A utf::string containing the text buffer.
2243
*----------------------------------------------------------------------
2247
CreateWithBOMBuffer(const void *buffer, // IN
2248
ssize_t lengthInBytes) // IN: NUL not included
2251
uint8 bom[4]; // BOM with max size.
2252
ssize_t len; // Length of BOM.
2253
StringEncoding encoding; // Encoding if a BOM is present.
2256
static const BOMMap mapBOM[] = {
2257
{{0}, 0, STRING_ENCODING_UTF8 }, // Default encoding.
2258
{{0xEF, 0xBB, 0xBF}, 3, STRING_ENCODING_UTF8 },
2259
{{0xFE, 0xFF}, 2, STRING_ENCODING_UTF16_BE },
2260
{{0xFF, 0xFE}, 2, STRING_ENCODING_UTF16_LE },
2261
{{0x00, 0x00, 0xFE, 0xFF}, 4, STRING_ENCODING_UTF32_BE },
2262
{{0xFF, 0xFE, 0x00, 0x00}, 4, STRING_ENCODING_UTF32_LE }
2265
ASSERT(lengthInBytes >= 0);
2266
unsigned int index = 0; // Default encoding, no need to check.
2267
for (unsigned int i = 1; i < ARRAYSIZE(mapBOM); i++) {
2268
if ( lengthInBytes >= mapBOM[i].len
2269
&& memcmp(mapBOM[i].bom, buffer, mapBOM[i].len) == 0) {
2275
return CreateWithLength(reinterpret_cast<const char*>(buffer) + mapBOM[index].len,
2276
lengthInBytes - mapBOM[index].len,
2277
mapBOM[index].encoding);
2282
*-----------------------------------------------------------------------------
2286
* Converts an integer to a utf::string.
2289
* A utf::string created with the given integer.
2294
*-----------------------------------------------------------------------------
2298
IntToStr(int64 val) // IN
2300
std::ostringstream ostream;
2302
return ostream.str().c_str();
2307
*-----------------------------------------------------------------------------
2311
* Copies an array to a vector.
2314
* A vector containing a shallow copy of the array.
2319
*-----------------------------------------------------------------------------
2322
template<typename T>
2324
CopyArray(const T *p, // IN:
2325
size_t n, // IN: The number of array elements to copy.
2326
std::vector<T>& buf) // OUT:
2331
memcpy(&buf[0], p, buf.size() * sizeof buf[0]);
2337
*-----------------------------------------------------------------------------
2339
* utf::CreateWritableBuffer --
2341
* Copies a utf::string to a writable buffer.
2344
* A std::vector containing the string data.
2349
*-----------------------------------------------------------------------------
2353
CreateWritableBuffer(const string& s, // IN:
2354
std::vector<char>& buf) // OUT: A copy of the string, as UTF-8.
2356
CopyArray(s.c_str(), s.bytes() + 1, buf);
2361
CreateWritableBuffer(const string& s, // IN:
2362
std::vector<utf16_t>& buf) // OUT: A copy of the string, as UTF-16.
2364
CopyArray(s.w_str(), s.w_size() + 1, buf);