2
* Copyright 2010 Google Inc.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
17
// Author: jmarantz@google.com (Joshua Marantz)
19
#ifndef PAGESPEED_KERNEL_BASE_STRING_UTIL_H_
20
#define PAGESPEED_KERNEL_BASE_STRING_UTIL_H_
22
#include <cctype> // for isascii
28
#include "base/logging.h"
29
#include "pagespeed/kernel/base/basictypes.h"
30
#include "pagespeed/kernel/base/string.h"
33
#include <cstdlib> // NOLINT
34
#include <string> // NOLINT
35
#if !defined(CHROMIUM_REVISION) || CHROMIUM_REVISION >= 205050
36
# include "base/strings/string_number_conversions.h"
37
# include "base/strings/string_piece.h"
38
# include "base/strings/string_util.h"
39
# include "base/strings/stringprintf.h"
41
# include "base/string_number_conversions.h"
42
# include "base/string_piece.h"
43
# include "base/string_util.h"
44
# include "base/stringprintf.h"
47
using base::StringAppendF;
48
using base::StringAppendV;
49
using base::SStringPrintf;
50
using base::StringPiece;
51
using base::StringPrintf;
53
typedef StringPiece::size_type stringpiece_ssize_type;
55
// Quick macro to get the size of a static char[] without trailing '\0'.
56
// Note: Cannot be used for char*, std::string, etc.
57
#define STATIC_STRLEN(static_string) (arraysize(static_string) - 1)
59
namespace net_instaweb {
61
struct StringCompareInsensitive;
63
typedef std::map<GoogleString, GoogleString> StringStringMap;
64
typedef std::map<GoogleString, int> StringIntMap;
65
typedef std::set<GoogleString> StringSet;
66
typedef std::set<GoogleString, StringCompareInsensitive> StringSetInsensitive;
67
typedef std::vector<GoogleString> StringVector;
68
typedef std::vector<StringPiece> StringPieceVector;
69
typedef std::vector<const GoogleString*> ConstStringStarVector;
70
typedef std::vector<GoogleString*> StringStarVector;
71
typedef std::vector<const char*> CharStarVector;
73
inline GoogleString IntegerToString(int i) {
74
return base::IntToString(i);
77
inline GoogleString UintToString(unsigned int i) {
78
return base::UintToString(i);
81
inline GoogleString Integer64ToString(int64 i) {
82
return base::Int64ToString(i);
85
inline GoogleString PointerToString(void* pointer) {
86
return StringPrintf("%p", pointer);
89
// NOTE: For a string of the form "45x", this sets *out = 45 but returns false.
90
// It sets *out = 0 given "Junk45" or "".
91
inline bool StringToInt(const char* in, int* out) {
92
return base::StringToInt(in, out);
95
inline bool StringToInt64(const char* in, int64* out) {
96
return base::StringToInt64(in, out);
99
inline bool StringToInt(const GoogleString& in, int* out) {
100
return base::StringToInt(in, out);
103
inline bool StringToInt64(const GoogleString& in, int64* out) {
104
return base::StringToInt64(in, out);
108
// Returns the part of the piece after the first '=', trimming any
109
// white space found at the beginning or end of the resulting piece.
110
// Returns an empty string if '=' was not found.
111
StringPiece PieceAfterEquals(const StringPiece& piece);
114
GoogleString StrCat(const StringPiece& a, const StringPiece& b);
115
GoogleString StrCat(const StringPiece& a, const StringPiece& b,
116
const StringPiece& c);
117
GoogleString StrCat(const StringPiece& a, const StringPiece& b,
118
const StringPiece& c, const StringPiece& d);
119
GoogleString StrCat(const StringPiece& a, const StringPiece& b,
120
const StringPiece& c, const StringPiece& d,
121
const StringPiece& e);
122
GoogleString StrCat(const StringPiece& a, const StringPiece& b,
123
const StringPiece& c, const StringPiece& d,
124
const StringPiece& e, const StringPiece& f);
125
GoogleString StrCat(const StringPiece& a, const StringPiece& b,
126
const StringPiece& c, const StringPiece& d,
127
const StringPiece& e, const StringPiece& f,
128
const StringPiece& g);
129
GoogleString StrCat(const StringPiece& a, const StringPiece& b,
130
const StringPiece& c, const StringPiece& d,
131
const StringPiece& e, const StringPiece& f,
132
const StringPiece& g, const StringPiece& h);
133
GoogleString StrCat(const StringPiece& a, const StringPiece& b,
134
const StringPiece& c, const StringPiece& d,
135
const StringPiece& e, const StringPiece& f,
136
const StringPiece& g, const StringPiece& h,
137
const StringPiece& i);
139
inline void StrAppend(GoogleString* target, const StringPiece& a) {
140
a.AppendToString(target);
142
void StrAppend(GoogleString* target,
143
const StringPiece& a, const StringPiece& b);
144
void StrAppend(GoogleString* target,
145
const StringPiece& a, const StringPiece& b,
146
const StringPiece& c);
147
void StrAppend(GoogleString* target,
148
const StringPiece& a, const StringPiece& b,
149
const StringPiece& c, const StringPiece& d);
150
void StrAppend(GoogleString* target,
151
const StringPiece& a, const StringPiece& b,
152
const StringPiece& c, const StringPiece& d,
153
const StringPiece& e);
154
void StrAppend(GoogleString* target,
155
const StringPiece& a, const StringPiece& b,
156
const StringPiece& c, const StringPiece& d,
157
const StringPiece& e, const StringPiece& f);
158
void StrAppend(GoogleString* target,
159
const StringPiece& a, const StringPiece& b,
160
const StringPiece& c, const StringPiece& d,
161
const StringPiece& e, const StringPiece& f,
162
const StringPiece& g);
163
void StrAppend(GoogleString* target,
164
const StringPiece& a, const StringPiece& b,
165
const StringPiece& c, const StringPiece& d,
166
const StringPiece& e, const StringPiece& f,
167
const StringPiece& g, const StringPiece& h);
168
void StrAppend(GoogleString* target,
169
const StringPiece& a, const StringPiece& b,
170
const StringPiece& c, const StringPiece& d,
171
const StringPiece& e, const StringPiece& f,
172
const StringPiece& g, const StringPiece& h,
173
const StringPiece& i);
175
// Split sp into pieces that are separated by any character in the given string
176
// of separators, and push those pieces in order onto components.
177
void SplitStringPieceToVector(const StringPiece& sp,
178
const StringPiece& separators,
179
StringPieceVector* components,
180
bool omit_empty_strings);
182
// Splits string 'full' using substr by searching it incrementally from
183
// left. Empty tokens are removed from the final result.
184
void SplitStringUsingSubstr(const StringPiece& full,
185
const StringPiece& substr,
186
StringPieceVector* result);
188
void BackslashEscape(const StringPiece& src,
189
const StringPiece& to_escape,
192
GoogleString CEscape(const StringPiece& src);
194
// TODO(jmarantz): Eliminate these definitions of HasPrefixString,
195
// UpperString, and LowerString, and re-add dependency on protobufs
196
// which also provide definitions for these.
198
bool HasPrefixString(const StringPiece& str, const StringPiece& prefix);
200
void UpperString(GoogleString* str);
202
void LowerString(GoogleString* str);
204
inline bool OnlyWhitespace(const GoogleString& str) {
205
return ContainsOnlyWhitespaceASCII(str);
208
// Replaces all instances of 'substring' in 's' with 'replacement'.
209
// Returns the number of instances replaced. Replacements are not
210
// subject to re-matching.
212
// NOTE: The string pieces must not overlap 's'.
213
int GlobalReplaceSubstring(const StringPiece& substring,
214
const StringPiece& replacement,
217
// Returns the index of the start of needle in haystack, or
218
// StringPiece::npos if it's not present.
219
stringpiece_ssize_type FindIgnoreCase(StringPiece haystack, StringPiece needle);
222
// Output a string which is the combination of all values in vector, separated
223
// by delim. Does not ignore empty strings in vector. So:
224
// JoinStringStar({"foo", "", "bar"}, ", ") == "foo, , bar". (Pseudocode)
225
GoogleString JoinStringStar(const ConstStringStarVector& vector,
226
const StringPiece& delim);
228
// See also: ./src/third_party/css_parser/src/strings/ascii_ctype.h
229
// We probably don't want our core string header file to have a
230
// dependecy on the Google CSS parser, so for now we'll write this here:
232
// upper-case a single character and return it.
233
// toupper() changes based on locale. We don't want this!
234
inline char UpperChar(char c) {
235
if ((c >= 'a') && (c <= 'z')) {
241
// lower-case a single character and return it.
242
// tolower() changes based on locale. We don't want this!
243
inline char LowerChar(char c) {
244
if ((c >= 'A') && (c <= 'Z')) {
250
// Check if given character is an HTML (or CSS) space (not the same as isspace,
251
// and not locale-dependent!). Note in particular that isspace always includes
252
// '\v' and HTML does not. See:
253
// http://www.whatwg.org/specs/web-apps/current-work/multipage/common-microsyntaxes.html#space-character
254
// http://www.w3.org/TR/CSS21/grammar.html
255
inline char IsHtmlSpace(char c) {
256
return (c == ' ') || (c == '\t') || (c == '\r') || (c == '\n') || (c == '\f');
259
inline char* strdup(const char* str) {
260
return base::strdup(str);
263
// Case-insensitive string comparison that is locale-independent.
264
int StringCaseCompare(const StringPiece& s1, const StringPiece& s2);
266
// Determines whether the character is a US Ascii number or letter. This
267
// is preferable to isalnum() for working with computer languages, as
268
// opposed to human languages.
269
inline bool IsAsciiAlphaNumeric(char ch) {
270
return (((ch >= 'a') && (ch <= 'z')) ||
271
((ch >= 'A') && (ch <= 'Z')) ||
272
((ch >= '0') && (ch <= '9')));
275
// Convenience functions.
276
inline bool IsHexDigit(char c) {
277
return ('0' <= c && c <= '9') ||
278
('A' <= c && c <= 'F') ||
279
('a' <= c && c <= 'f');
282
// In-place removal of leading and trailing HTML whitespace. Returns true if
283
// any whitespace was trimmed.
284
bool TrimWhitespace(StringPiece* str);
286
// In-place removal of leading and trailing quote. Removes whitespace as well.
287
void TrimQuote(StringPiece* str);
289
// In-place removal of multiple levels of leading and trailing quotes,
290
// include url-escaped quotes, optionally backslashed. Removes
291
// whitespace as well.
292
void TrimUrlQuotes(StringPiece* str);
294
// Trims leading HTML whitespace. Returns true if any whitespace was trimmed.
295
bool TrimLeadingWhitespace(StringPiece* str);
297
// Trims trailing HTML whitespace. Returns true if any whitespace was trimmed.
298
bool TrimTrailingWhitespace(StringPiece* str);
300
// Non-destructive TrimWhitespace.
301
// WARNING: in should not point inside output!
302
inline void TrimWhitespace(const StringPiece& in, GoogleString* output) {
303
DCHECK((in.data() < output->data()) ||
304
(in.data() >= (output->data() + output->length())))
305
<< "Illegal argument aliasing in TrimWhitespace";
306
StringPiece temp(in); // Mutable copy
307
TrimWhitespace(&temp); // Modifies temp
308
temp.CopyToString(output);
311
// Accumulates a decimal value from 'c' into *value.
312
// Returns false and leaves *value unchanged if c is not a decimal digit.
313
bool AccumulateDecimalValue(char c, uint32* value);
315
// Accumulates a hex value from 'c' into *value
316
// Returns false and leaves *value unchanged if c is not a hex digit.
317
bool AccumulateHexValue(char c, uint32* value);
319
// Return true iff the two strings are equal, ignoring case.
320
bool MemCaseEqual(const char* s1, size_t size1, const char* s2, size_t size2);
321
inline bool StringCaseEqual(const StringPiece& s1, const StringPiece& s2) {
322
return MemCaseEqual(s1.data(), s1.size(), s2.data(), s2.size());
325
// Return true iff str starts with prefix, ignoring case.
326
bool StringCaseStartsWith(const StringPiece& str, const StringPiece& prefix);
327
// Return true iff str ends with suffix, ignoring case.
328
bool StringCaseEndsWith(const StringPiece& str, const StringPiece& suffix);
330
// Return true if str is equal to the concatenation of first and second. Note
331
// that this respects case.
332
bool StringEqualConcat(const StringPiece& str, const StringPiece& first,
333
const StringPiece& second);
335
// Return the number of mismatched chars in two strings. Useful for string
336
// comparisons without short-circuiting to prevent timing attacks.
337
// See http://codahale.com/a-lesson-in-timing-attacks/
338
int CountCharacterMismatches(StringPiece s1, StringPiece s2);
340
struct CharStarCompareInsensitive {
341
bool operator()(const char* s1, const char* s2) const {
342
return (StringCaseCompare(s1, s2) < 0);
346
struct CharStarCompareSensitive {
347
bool operator()(const char* s1, const char* s2) const {
348
return (strcmp(s1, s2) < 0);
352
struct StringCompareSensitive {
353
bool operator()(const StringPiece& s1, const StringPiece& s2) const {
358
struct StringCompareInsensitive {
359
bool operator()(const StringPiece& s1, const StringPiece& s2) const {
360
return (StringCaseCompare(s1, s2) < 0);
364
// Parse a list of integers into a vector. Empty values are ignored.
365
// Returns true if all non-empty values are converted into integers.
366
bool SplitStringPieceToIntegerVector(
367
const StringPiece& src, const StringPiece& separators,
368
std::vector<int>* ints);
370
// Does a path end in slash?
371
inline bool EndsInSlash(const StringPiece& path) {
372
return path.ends_with("/");
375
// Make sure directory's path ends in '/'.
376
inline void EnsureEndsInSlash(GoogleString* dir) {
377
if (!EndsInSlash(*dir)) {
382
// Given a string such as: a b "c d" e 'f g'
383
// Parse it into a vector: ["a", "b", "c d", "e", "f g"]
384
// NOTE: actually used for html doctype recognition,
385
// so assumes HtmlSpace separation.
386
void ParseShellLikeString(const StringPiece& input,
387
std::vector<GoogleString>* output);
389
// Counts the number of times that substring appears in text
390
// Note: for a substring that can overlap itself, it counts not necessarily
391
// disjoint occurrences of the substring.
392
// For example: "aaa" appears in "aaaaa" 3 times, not once
393
int CountSubstring(const StringPiece& text, const StringPiece& substring);
395
// Appends new empty string to a StringVector and returns a pointer to it.
396
inline GoogleString* StringVectorAdd(StringVector* v) {
397
v->push_back(GoogleString());
401
// Append string-like objects accessed through an iterator.
403
void AppendJoinIterator(
404
GoogleString* dest, I start, I end, StringPiece sep) {
406
// Skip a lot of set-up and tear-down in empty case.
409
size_t size = dest->size();
410
size_t sep_size = 0; // No separator before initial element
411
for (I str = start; str != end; ++str) {
412
size += str->size() + sep_size;
413
sep_size = sep.size();
416
StringPiece to_prepend("");
417
for (I str = start; str != end; ++str) {
418
StrAppend(dest, to_prepend, *str);
423
// Append an arbitrary iterable collection of strings such as a StringSet,
424
// StringVector, or StringPieceVector, separated by a given separator, with
425
// given initial and final strings. Argument order chosen to be consistent
428
void AppendJoinCollection(
429
GoogleString* dest, const C& collection, StringPiece sep) {
430
AppendJoinIterator(dest, collection.begin(), collection.end(), sep);
434
GoogleString JoinCollection(const C& collection, StringPiece sep) {
436
AppendJoinCollection(&result, collection, sep);
440
// Converts a boolean to string.
441
inline const char* BoolToString(bool b) {
442
return (b ? "true" : "false");
445
// Using isascii with signed chars is unfortunately undefined.
446
inline bool IsAscii(char c) {
447
return isascii(static_cast<unsigned char>(c));
450
// Tests if c is a standard (non-control) ASCII char 0x20-0x7E.
451
// Note: This does not include TAB (0x09), LF (0x0A) or CR (0x0D).
452
inline bool IsNonControlAscii(char c) {
453
return ('\x20' <= c) && (c <= '\x7E');
457
} // namespace net_instaweb
459
#endif // PAGESPEED_KERNEL_BASE_STRING_UTIL_H_