2
* Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
4
* This program is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU General Public License as
6
* published by the Free Software Foundation; version 2 of the
9
* This program is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
* GNU General Public License for more details.
14
* You should have received a copy of the GNU General Public License
15
* along with this program; if not, write to the Free Software
16
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
20
#include "base/string_utilities.h"
21
#include "base/file_functions.h"
31
#include <boost/function.hpp>
32
#include <boost/bind.hpp>
34
DEFAULT_LOG_DOMAIN(DOMAIN_BASE);
42
//--------------------------------------------------------------------------------------------------
45
* Converts an UTF-8 encoded string to an UTF-16 string.
47
std::wstring string_to_wstring(const std::string &s)
51
required= MultiByteToWideChar(CP_UTF8, 0, s.data(), -1, NULL, 0);
53
return std::wstring();
55
// Required contains the length for the result string including the terminating 0.
56
WCHAR *buffer = g_new(WCHAR, required);
57
MultiByteToWideChar(CP_UTF8, 0, s.data(), -1, buffer, required);
58
std::wstring converted(buffer);
64
//--------------------------------------------------------------------------------------------------
67
* Converts an UTF-16 encoded string to an UTF-8 string.
69
std::string wstring_to_string(const std::wstring &s)
72
int required = WideCharToMultiByte(CP_UTF8, 0, s.c_str(), -1, NULL, 0, NULL, NULL);
73
converted = (char*) g_malloc(required);
74
WideCharToMultiByte(CP_UTF8, 0, s.c_str(), -1, converted, required, NULL, NULL);
75
std::string result = converted;
81
std::wstring path_from_utf8(const std::string &s)
83
return string_to_wstring(s);
86
std::string path_from_utf8(const std::string &s)
93
//--------------------------------------------------------------------------------------------------
95
std::string string_to_path_for_open(const std::string &s)
98
std::wstring ws = string_to_wstring(s);
99
int buflen = GetShortPathNameW(ws.c_str(), NULL, 0);
102
wchar_t *buffer = g_new(wchar_t, buflen);
103
if (GetShortPathNameW(ws.c_str(), buffer, buflen) > 0)
106
buflen = WideCharToMultiByte(CP_UTF8, 0, buffer, buflen, NULL, 0, 0, 0);
107
buffer2 = g_new(char, buflen);
108
if (WideCharToMultiByte(CP_UTF8, 0, buffer, buflen, buffer2, buflen, 0, 0) == 0)
110
std::string path(buffer2);
125
//--------------------------------------------------------------------------------------------------
127
inline bool is_invalid_filesystem_char(int ch)
129
static const char invalids[] = "/?<>\\:*|\"^";
131
return memchr(invalids, ch, sizeof(invalids)-1) != NULL;
134
std::string sanitize_file_name(const std::string &s)
136
static const char *invalid_filenames[] = {
137
"com1", "com2", "com3", "com4", "com5", "com6",
138
"com7", "com8", "com9", "lpt1", "lpt2", "lpt3", "lpt4",
139
"lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "con", "nul", "prn",
145
for (std::string::const_iterator c = s.begin(); c != s.end(); ++c)
147
// utf-8 has the high-bit = 1, so we just copy those verbatim
148
if (isalnum(*c) || (unsigned char)*c >= 128 || (ispunct(*c) && !is_invalid_filesystem_char(*c)))
154
// not valid under windows
155
if (!out.empty() && (out[out.size()-1] == ' ' || out[out.size()-1] == '.'))
156
out[out.size()-1] = '_';
158
for (const char **fn = invalid_filenames; *fn; ++fn)
160
if (strcmp(out.c_str(), *fn) == 0)
170
//--------------------------------------------------------------------------------------------------
172
string trim_right(const string& s, const string& t)
175
string::size_type i (d.find_last_not_of(t));
176
if (i == string::npos)
179
return d.erase(d.find_last_not_of(t) + 1) ;
182
//--------------------------------------------------------------------------------------------------
184
string trim_left(const string& s, const string& t)
187
return d.erase(0, s.find_first_not_of(t)) ;
190
//--------------------------------------------------------------------------------------------------
192
string trim(const string& s, const string& t)
195
return trim_left(trim_right(d, t), t) ;
198
//--------------------------------------------------------------------------------------------------
201
* Simple case conversion routine, which returns a new string.
203
string tolower(const string& s)
205
char *str_down= g_utf8_strdown(s.c_str(), g_utf8_strlen(s.c_str(), -1));
206
std::string result(str_down);
211
//--------------------------------------------------------------------------------------------------
213
string toupper(const string& s)
215
char *str_up= g_utf8_strup(s.c_str(), g_utf8_strlen(s.c_str(), -1));
216
std::string result(str_up);
221
//--------------------------------------------------------------------------------------------------
223
std::string truncate_text(const std::string& s, int max_length)
225
if ((int) s.length() > max_length)
227
std::string shortened(s.substr(0, max_length));
228
const char *prev = g_utf8_find_prev_char(shortened.c_str(), shortened.c_str() + (max_length - 1));
231
shortened.resize(prev - shortened.c_str(), 0);
232
shortened.append("...");
239
//--------------------------------------------------------------------------------------------------
241
std::string sanitize_utf8(const std::string& s)
244
if (!g_utf8_validate(s.data(), s.size(), &end))
245
return std::string(s.data(), end);
249
//--------------------------------------------------------------------------------------------------
251
std::vector<std::string> split(const std::string &s, const std::string &sep, int count)
253
std::vector<std::string> parts;
256
std::string::size_type p;
265
while (!ss.empty() && p != std::string::npos && (count < 0 || count > 0))
267
parts.push_back(ss.substr(0, p));
268
ss= ss.substr(p+sep.size());
278
//--------------------------------------------------------------------------------------------------
280
std::vector<std::string> split_token_list(const std::string &s, int sep)
282
std::vector<std::string> parts;
285
std::string::size_type end = s.size(), pe, p = 0;
298
// keep going until we find closing '
299
while (pe < end && !done)
304
if (pe < end && s[pe] == '\'')
315
parts.push_back(s.substr(p, pe-p));
318
while (p < end && (s[p] == ' ' || s[p] == '\t' || s[p] == '\r' || s[p] == '\n')) p++;
322
log_debug("Error splitting string list");
331
// keep going until we find closing "
332
while (pe < end && !done)
337
if (pe < end && s[pe] == '"')
348
parts.push_back(s.substr(p, pe-p));
351
while (p < end && (s[p] == ' ' || s[p] == '\t' || s[p] == '\r' || s[p] == '\n')) p++;
355
log_debug("Error splitting string list");
367
// skip until separator
375
parts.push_back(trim_right(s.substr(p, pe-p)));
385
//--------------------------------------------------------------------------------------------------
387
bool partition(const std::string &s, const std::string &sep, std::string &left, std::string &right)
389
std::string::size_type p = s.find(sep);
390
if (p != std::string::npos)
392
left = s.substr(0, p);
393
right = s.substr(p + sep.size());
401
//--------------------------------------------------------------------------------------------------
404
* Returns a string containing all characters beginning at "start" from the given string "id", which form
405
* a valid, unqualified identifier. The returned identifier does not contain any quoting anymore.
406
* Note: this function is UTF-8 safe as it skips over all characters except some which are guaranteed
407
* not to be part of any valid UTF-8 sequence.
409
* @param id The string to examine.
410
* @param start The start position to search from.
412
* @result Returns the first found identifier starting at "start" or an empty string if nothing was
413
* found. Parameter "start" points to the first character after the found identifier.
415
string get_identifier(const string& id, string::const_iterator& start)
417
string::const_iterator token_end= id.end();
418
bool is_symbol_quoted= false;
419
for (string::const_iterator i= start, i_end= token_end; i != i_end; ++i)
421
if (i_end != token_end)
426
if (!is_symbol_quoted)
430
if (!is_symbol_quoted)
441
is_symbol_quoted= true;
447
if (token_end - start < 2)
448
is_symbol_quoted= false;
449
string result(start, token_end);
451
if (is_symbol_quoted)
452
return result.substr(1, result.size() - 2);
457
//--------------------------------------------------------------------------------------------------
460
* Splits the given string into identifier parts assuming a format as allowed by the MySQL syntax for
461
* qualified identifiers, e.g. part1.part2.part3 (any of the parts might be quoted).
462
* In addition to the traditional syntax also these enhancements are supported:
463
* - Unlimited level of nesting.
464
* - Quoting might be done using single quotes, double quotes and back ticks.
466
* If an identifier is not separated by a dot from the rest of the input then this is considered
467
* invalid input and ignored. Only identifiers found until that syntax violation are returned.
469
vector<string> split_qualified_identifier(const string& id)
471
vector<string> result;
472
string::const_iterator iterator= id.begin();
476
token= get_identifier(id, iterator);
479
result.push_back(token);
480
} while ((iterator != id.end()) && (*iterator++ == '.'));
485
//--------------------------------------------------------------------------------------------------
488
* Removes the first path part from @path and returns this part as well as the shortend path.
490
std::string pop_path_front(std::string &path)
492
std::string::size_type p= path.find('/');
494
if (p == std::string::npos || p == path.length()-1)
500
res= path.substr(0, p);
501
path= path.substr(p+1);
505
//--------------------------------------------------------------------------------------------------
508
* Removes the last path part from @path and returns this part as well as the shortend path.
510
std::string pop_path_back(std::string &path)
512
std::string::size_type p= path.rfind('/');
514
if (p == std::string::npos || p == path.length()-1)
520
res= path.substr(p+1);
521
path= path.substr(0, p);
525
//--------------------------------------------------------------------------------------------------
528
* Helper routine to format a string into an STL string using the printf parameter syntax.
530
std::string strfmt(const char *fmt, ...)
537
tmp= g_strdup_vprintf(fmt, args);
547
//--------------------------------------------------------------------------------------------------
549
BASELIBRARY_PUBLIC_FUNC std::string sizefmt(int64_t s, bool metric)
561
unit = "iB"; // http://en.wikipedia.org/wiki/Binary_prefix
565
return strfmt("%iB", (int) s);
568
float value = s / one_kb;
570
return strfmt("%.02fK%s", value, unit);
575
return strfmt("%.02fM%s", value, unit);
580
return strfmt("%.02fG%s", value, unit);
585
return strfmt("%.02fT%s", value, unit);
587
return strfmt("%.02fP%s", value / one_kb, unit);
595
//--------------------------------------------------------------------------------------------------
598
* Helper routine to strip a string into an STL string using the printf parameter syntax.
600
std::string strip_text(const std::string &text, bool left, bool right)
601
{//TODO sigc rewrite it in std/boost way
603
boost::function<bool (std::string::value_type)> is_space=
604
boost::bind(&std::isspace<std::string::value_type>, _1, loc);
606
std::string::const_iterator l_edge= !left ? text.begin() :
607
std::find_if(text.begin(), text.end(), boost::bind(std::logical_not<bool>(), boost::bind(is_space,_1)));
608
std::string::const_reverse_iterator r_edge= !right ? text.rbegin() :
609
std::find_if(text.rbegin(), text.rend(), boost::bind(std::logical_not<bool>(), boost::bind(is_space,_1)));
611
return std::string(l_edge, r_edge.base());
614
//--------------------------------------------------------------------------------------------------
617
* Add the given extension to the filename, if necessary.
620
std::string normalize_path_extension(std::string filename, std::string extension)
622
if (!extension.empty() && !filename.empty())
624
std::string::size_type p = filename.rfind('.');
625
std::string old_extension = p != std::string::npos ? filename.substr(p) : "";
627
if (old_extension.find('/') != std::string::npos || old_extension.find('\\') != std::string::npos)
628
old_extension.clear();
630
if (!extension.empty() && extension[0] != '.')
631
extension = "."+extension;
633
if (old_extension.empty())
634
filename.append(extension);
637
if (old_extension != extension)
638
filename = filename.substr(0, p).append(extension);
645
* Removes all unnecessary path separators as well as "./" combinations.
646
* If there is a parent-dir entry (../) then this as well as the directly prefacing
647
* dir entry is removed.
649
std::string normalize_path(const std::string path)
651
// First convert all separators to the one that is used on the platform (no mix)
652
// and ease so at the same time further processing here.
654
std::string separator(1, G_DIR_SEPARATOR);
657
replace(result, "\\", separator);
658
replace(result, "/", separator);
660
std::string double_separator = separator + separator;
661
while (result.find(double_separator) != std::string::npos)
662
replace(result, double_separator, separator);
664
// Sanity check. Return *after* we have converted the slashs. This is part of the normalization.
665
if (result.size() < 2)
668
std::vector<std::string> parts= split(result, separator);
670
// Construct result backwards while examining the path parts.
672
int pending_count= 0;
673
for (int i= parts.size() - 1; i >= 0; i--)
675
if (parts[i].compare(".") == 0)
676
// References to the current directory can be removed without further change.
679
if (parts[i].compare("..") == 0)
681
// An entry that points back to the parent dir.
682
// Ignore this and keep track for later removal of the parent dir.
686
if (pending_count > 0)
688
// If this is a normal dir entry and we have pending parent-dir redirections
689
// then go one step up by removing (ignoring) this entry.
693
result = separator + parts[i] + result;
696
// Don't return the leading separator.
697
return result.substr(1);
700
std::string expand_tilde(const std::string &path)
702
if (!path.empty() && path[0] == '~' && (path.size() == 1 || path[1] == G_DIR_SEPARATOR))
704
const char *homedir = g_getenv("HOME");
706
homedir = g_get_home_dir();
708
return std::string(homedir).append(path.substr(1));
713
//--------------------------------------------------------------------------------------------------
716
* Tests if t begins with part.
718
bool starts_with(const std::string& s, const std::string& part)
720
return (s.substr(0, part.length()) == part);
723
//--------------------------------------------------------------------------------------------------
725
void replace(std::string& value, const std::string& search, const std::string& replacement)
727
std::string::size_type next;
729
for (next = value.find(search); next != std::string::npos; next = value.find(search,next))
731
value.replace(next,search.length(), replacement);
732
next += replacement.length();
736
//--------------------------------------------------------------------------------------------------
739
* Write text data to file, converting to \r\n if in Windows.
741
void set_text_file_contents(const std::string &filename, const std::string &data)
744
// Opening a file in text mode will automatically convert \n to \r\n.
745
FILE *f = base_fopen(filename.c_str(), "w+t");
747
throw std::runtime_error(g_strerror(errno));
749
size_t bytes_written= fwrite(data.data(), 1, data.size(), f);
751
if (bytes_written != data.size())
752
throw std::runtime_error(g_strerror(errno));
754
GError *error = NULL;
755
g_file_set_contents(filename.c_str(), data.data(), data.size(), &error);
758
std::string msg = error->message;
760
throw std::runtime_error(msg);
765
//--------------------------------------------------------------------------------------------------
768
* Read text data from file, converting to \n if necessary.
770
std::string get_text_file_contents(const std::string &filename)
772
FILE *f = base_fopen(filename.c_str(), "r");
774
throw std::runtime_error(g_strerror(errno));
780
while ((c = fread(buffer, 1, sizeof(buffer), f)) > 0)
782
char *bufptr = buffer;
783
char *eobuf = buffer + c;
784
while (bufptr < eobuf)
786
char *eol = (char*)memchr(bufptr, '\r', eobuf - bufptr);
789
// if \r is in string, we append everyting up to it and then add \n
790
text.append(bufptr, eol-bufptr);
793
if (*bufptr == '\n') // make sure it is \r\n and not only \r
798
// no \r found, append the whole thing and go for more
809
throw std::runtime_error(g_strerror(err));
817
//--------------------------------------------------------------------------------------------------
819
/** Escape a string to be used in a SQL query
820
* Same code as used by mysql. Handles null bytes in the middle of the string.
822
std::string escape_sql_string(const std::string &s)
825
result.reserve(s.size());
827
for (std::string::const_iterator ch= s.begin(); ch != s.end(); ++ch)
833
case 0: /* Must be escaped for 'mysql' */
836
case '\n': /* Must be escaped for logs */
848
case '"': /* Better safe than sorry */
851
case '\032': /* This gives problems on Win32 */
857
result.push_back('\\');
858
result.push_back(escape);
861
result.push_back(*ch);
866
//--------------------------------------------------------------------------------------------------
868
// NOTE: This is not the same as escape_sql_string, as embedded ` are escaped as ``, not \`
869
std::string escape_backticks(const std::string &s)
872
result.reserve(s.size());
874
for (std::string::const_iterator ch= s.begin(); ch != s.end(); ++ch)
880
case 0: /* Must be escaped for 'mysql' */
883
case '\n': /* Must be escaped for logs */
895
case '"': /* Better safe than sorry */
898
case '\032': /* This gives problems on Win32 */
903
result.push_back('`');
908
result.push_back('\\');
909
result.push_back(escape);
912
result.push_back(*ch);
917
//--------------------------------------------------------------------------------------------------
920
* Parses the given command line (which must be a usual mysql start command) and extracts the
921
* value for the given parameter. The function can only return options of the form "option-name = option-value"
922
* (both quoted and unquoted).
924
std::string extract_option_from_command_line(const std::string& option, const std::string &command_line)
927
size_t position = command_line.find(option);
928
if (position != std::string::npos)
930
position += option.size(); // Skip option name and find equal sign.
931
while (position < command_line.size() && command_line[position] != '=')
934
if (command_line[position] == '=')
938
// Skip any white space.
939
while (position < command_line.size() && command_line[position] == ' ')
943
if (command_line[position] == '"' || command_line[position] == '\'')
944
terminator = command_line[position++];
948
size_t end_position = command_line.find(terminator, position);
949
if (end_position == std::string::npos)
951
// Terminator not found means the string was either not properly terminated (if quoted)
952
// or contains no space char. In this case take everything we can get.
953
if (terminator != ' ')
955
result = command_line.substr(position);
958
result = command_line.substr(position, end_position - position);
964
//--------------------------------------------------------------------------------------------------
967
* Splits the given font description and returns its details in the provided fields.
969
* @return True if successful, otherwise false.
971
bool parse_font_description(const std::string &fontspec, std::string &font, int &size, bool &bold,
974
std::vector<std::string> parts= split(fontspec, " ");
983
if (!parts.empty() && sscanf(parts.back().c_str(), "%i", &size) == 1)
986
for (int i= 0; i < 2 && !parts.empty(); i++)
988
if (g_strcasecmp(parts.back().c_str(), "bold")==0)
994
if (g_strcasecmp(parts.back().c_str(), "italic")==0)
1004
for (unsigned int i= 1; i < parts.size(); i++)
1005
font+= " " + parts[i];
1010
//--------------------------------------------------------------------------------------------------
1012
std::string unquote_identifier(const std::string& identifier)
1015
int size = identifier.size();
1017
if (identifier[0] == '"' || identifier[0] == '`')
1020
if (identifier[size-1] == '"' || identifier[size-1] == '`')
1025
return identifier.substr(start, size);
1028
//--------------------------------------------------------------------------------------------------
1030
std::string quote_identifier(const std::string& identifier, const char quote_char)
1032
return quote_char + identifier + quote_char;
1035
//--------------------------------------------------------------------------------------------------
1038
* Quotes the given identifier, but only if it needs to be quoted.
1039
* http://dev.mysql.com/doc/refman/5.1/en/identifiers.html specifies what is allowed in unquoted identifiers.
1040
* Leading numbers are not strictly forbidden but discouraged as they may lead to ambiguous behavior.
1042
std::string quote_identifier_if_needed(const std::string &ident, const char quote_char)
1044
bool needs_quotation= false;
1045
for (std::string::const_iterator i= ident.begin(); i != ident.end(); ++i)
1047
if ((*i >= 'a' && *i <= 'z') || (*i >= 'A' && *i <= 'Z') || (*i >= '0' && *i <= '9')
1048
|| (*i == '_') || (*i == '$') || ((unsigned char)(*i) > 0x7F))
1050
needs_quotation = true;
1053
if (needs_quotation)
1054
return quote_char + ident + quote_char;
1059
//--------------------------------------------------------------------------------------------------
1061
EolHelpers::Eol_format EolHelpers::detect(const std::string &text)
1063
std::string::size_type pos= text.find_first_of("\r\n");
1064
if (std::string::npos == pos)
1065
return default_eol_format();
1066
if ('\r' == text[pos])
1067
return ('\n' == text[pos+1]) ? eol_crlf : eol_cr;
1072
int EolHelpers::count_lines(const std::string &text)
1074
Eol_format eol_format= detect(text);
1075
char eol_sym= (eol_cr == eol_format) ? '\r' : '\n';
1076
return std::count(text.begin(), text.end(), eol_sym);
1079
bool EolHelpers::check(const std::string &text)
1081
std::string::size_type pos= text.find_first_of("\n\r");
1082
if (std::string::npos == pos)
1084
Eol_format eol_format= detect(text);
1085
if (eol_lf == eol_format)
1087
if (text.find("\r") != std::string::npos)
1090
else if (eol_cr == eol_format)
1092
if (text.find("\n") != std::string::npos)
1095
else if (eol_crlf == eol_format)
1099
if (('\n' == text[pos]) || ('\n' != text[pos+1]))
1103
pos= text.find_first_of("\n\r", pos);
1105
while (std::string::npos != pos);
1110
void EolHelpers::conv(const std::string &src_text, Eol_format src_eol_format, std::string &dest_text, Eol_format dest_eol_format)
1112
if (src_eol_format == dest_eol_format)
1113
throw std::logic_error("source and target line ending formats coincide, no need to convert");
1115
const std::string &src_eol= eol(src_eol_format);
1116
const std::string &dest_eol= eol(dest_eol_format);
1117
std::string::size_type src_eol_length= src_eol.size();
1119
if (dest_eol.size() != src_eol.size())
1122
int line_count= count_lines(src_text);
1123
size_t dest_size= src_text.size() + line_count * (dest_eol.size() - src_eol.size());
1124
dest_text.reserve(dest_size);
1125
std::string::size_type prev_pos= 0;
1126
std::string::size_type pos= 0;
1127
while ((pos= src_text.find(src_eol, pos)) != std::string::npos)
1129
dest_text.append(src_text, prev_pos, pos-prev_pos).append(dest_eol);
1130
pos+= src_eol_length;
1133
dest_text.append(src_text, prev_pos, std::string::npos);
1137
dest_text= src_text;
1138
std::string::size_type pos= 0;
1139
while ((pos= dest_text.find(src_eol, pos)) != std::string::npos)
1141
dest_text.replace(pos, src_eol_length, dest_eol);
1142
pos+= src_eol_length;
1147
void EolHelpers::fix(const std::string &src_text, std::string &dest_text, Eol_format eol_format)
1149
const std::string &dest_eol= eol(eol_format);
1150
std::string::size_type dest_eol_length= dest_eol.size();
1153
if (eol_crlf == eol_format)
1155
int cr_count= std::count(src_text.begin(), src_text.end(), '\r');
1156
int lf_count= std::count(src_text.begin(), src_text.end(), '\n');
1159
std::string::size_type pos= 0;
1160
while ((pos= src_text.find(dest_eol, pos)) != std::string::npos)
1163
pos+= dest_eol_length;
1166
size_t dest_size= src_text.size() + (cr_count - crlf_count) + (lf_count - crlf_count);
1167
dest_text.reserve(dest_size);
1170
std::string::size_type prev_pos= 0;
1171
std::string::size_type pos= 0;
1172
std::string crlf= "\r\n";
1173
while ((pos= src_text.find_first_of(crlf, pos)) != std::string::npos)
1175
dest_text.append(src_text, prev_pos, pos-prev_pos).append(dest_eol);
1176
if (('\r' == src_text[pos]) && ('\n' == src_text[pos+1]))
1181
dest_text.append(src_text, prev_pos, std::string::npos);
1184
//--------------------------------------------------------------------------------------------------