2
Copyright 2015 The Camlistore Authors
4
Licensed under the Apache License, Version 2.0 (the "License");
5
you may not use this file except in compliance with the License.
6
You may obtain a copy of the License at
8
http://www.apache.org/licenses/LICENSE-2.0
10
Unless required by applicable law or agreed to in writing, software
11
distributed under the License is distributed on an "AS IS" BASIS,
12
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
See the License for the specific language governing permissions and
14
limitations under the License.
25
var htmlEscaper = New(
33
var htmlUnescaper = New(
41
var capitalLetters = New("a", "A", "b", "B")
43
func TestReplacer(t *testing.T) {
44
type testCase struct {
48
var testCases []testCase
50
// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
51
str := func(b byte) string {
52
return string([]byte{b})
56
// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
58
for i := 0; i < 256; i++ {
59
s = append(s, str(byte(i)), str(byte(i+1)))
63
// Test cases with 1-byte old strings, 1-byte new strings.
64
testCases = append(testCases,
65
testCase{capitalLetters, "brad", "BrAd"},
66
testCase{capitalLetters, strings.Repeat("a", (32<<10)+123), strings.Repeat("A", (32<<10)+123)},
67
testCase{capitalLetters, "", ""},
69
testCase{inc, "brad", "csbe"},
70
testCase{inc, "\x00\xff", "\x01\x00"},
71
testCase{inc, "", ""},
73
testCase{New("a", "1", "a", "2"), "brad", "br1d"},
76
// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
78
for i := 0; i < 256; i++ {
83
s = append(s, str(byte(i)), strings.Repeat(str(byte(i)), n))
87
// Test cases with 1-byte old strings, variable length new strings.
88
testCases = append(testCases,
89
testCase{htmlEscaper, "No changes", "No changes"},
90
testCase{htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"},
91
testCase{htmlEscaper, "&&&", "&&&"},
92
testCase{htmlEscaper, "", ""},
94
testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
95
testCase{repeat, "abba", "abbbba"},
96
testCase{repeat, "", ""},
98
testCase{New("a", "11", "a", "22"), "brad", "br11d"},
101
// The remaining test cases have variable length old strings.
103
testCases = append(testCases,
104
testCase{htmlUnescaper, "&amp;", "&"},
105
testCase{htmlUnescaper, "<b>HTML's neat</b>", "<b>HTML's neat</b>"},
106
testCase{htmlUnescaper, "", ""},
108
testCase{New("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
110
testCase{New("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
112
testCase{New("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
115
// gen1 has multiple old strings of variable length. There is no
116
// overall non-empty common prefix, but some pairwise common prefixes.
122
"longerst", "most long",
130
testCases = append(testCases,
131
testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
132
testCase{gen1, "long, longerst, longer", "short, most long, medium"},
133
testCase{gen1, "xxxxx", "xxxxX"},
134
testCase{gen1, "XiX", "YiY"},
135
testCase{gen1, "", ""},
138
// gen2 has multiple old strings with no pairwise common prefix.
144
testCases = append(testCases,
145
testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
146
testCase{gen2, "", ""},
149
// gen3 has multiple old strings with an overall common prefix.
151
"abracadabra", "poof",
152
"abracadabrakazam", "splat",
153
"abraham", "lincoln",
154
"abrasion", "scrape",
157
testCases = append(testCases,
158
testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
159
testCase{gen3, "abrasion abracad", "scrape abracad"},
160
testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
161
testCase{gen3, "", ""},
164
// foo{1,2,3,4} have multiple old strings with an overall common prefix
165
// and 1- or 2- byte extensions from the common prefix.
187
testCases = append(testCases,
188
testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
189
testCase{foo1, "", ""},
191
testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
192
testCase{foo2, "", ""},
194
testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
195
testCase{foo3, "", ""},
197
testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
198
testCase{foo4, "", ""},
201
// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
202
allBytes := make([]byte, 256)
203
for i := range allBytes {
204
allBytes[i] = byte(i)
206
allString := string(allBytes)
212
testCases = append(testCases,
213
testCase{genAll, allString, "[all]"},
214
testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
215
testCase{genAll, "", ""},
218
// Test cases with empty old strings.
220
blankToX1 := New("", "X")
221
blankToX2 := New("", "X", "", "")
222
blankHighPriority := New("", "X", "o", "O")
223
blankLowPriority := New("o", "O", "", "X")
224
blankNoOp1 := New("", "")
225
blankNoOp2 := New("", "", "", "A")
226
blankFoo := New("", "X", "foobar", "R", "foobaz", "Z")
227
testCases = append(testCases,
228
testCase{blankToX1, "foo", "XfXoXoX"},
229
testCase{blankToX1, "", "X"},
231
testCase{blankToX2, "foo", "XfXoXoX"},
232
testCase{blankToX2, "", "X"},
234
testCase{blankHighPriority, "oo", "XOXOX"},
235
testCase{blankHighPriority, "ii", "XiXiX"},
236
testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
237
testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
238
testCase{blankHighPriority, "", "X"},
240
testCase{blankLowPriority, "oo", "OOX"},
241
testCase{blankLowPriority, "ii", "XiXiX"},
242
testCase{blankLowPriority, "oiio", "OXiXiOX"},
243
testCase{blankLowPriority, "iooi", "XiOOXiX"},
244
testCase{blankLowPriority, "", "X"},
246
testCase{blankNoOp1, "foo", "foo"},
247
testCase{blankNoOp1, "", ""},
249
testCase{blankNoOp2, "foo", "foo"},
250
testCase{blankNoOp2, "", ""},
252
testCase{blankFoo, "foobarfoobaz", "XRXZX"},
253
testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
254
testCase{blankFoo, "", "X"},
257
// single string replacer
259
abcMatcher := New("abc", "[match]")
261
testCases = append(testCases,
262
testCase{abcMatcher, "", ""},
263
testCase{abcMatcher, "ab", "ab"},
264
testCase{abcMatcher, "abc", "[match]"},
265
testCase{abcMatcher, "abcd", "[match]d"},
266
testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
269
// Issue 6659 cases (more single string replacer)
271
noHello := New("Hello", "")
272
testCases = append(testCases,
273
testCase{noHello, "Hello", ""},
274
testCase{noHello, "Hellox", "x"},
275
testCase{noHello, "xHello", "x"},
276
testCase{noHello, "xHellox", "xx"},
279
// No-arg test cases.
282
testCases = append(testCases,
283
testCase{nop, "abc", "abc"},
284
testCase{nop, "", ""},
287
// Run the test cases.
289
for i, tc := range testCases {
291
// Replace with len(in) == cap(in)
292
in := make([]byte, len(tc.in))
294
if s := string(tc.r.Replace(in)); s != tc.out {
295
t.Errorf("%d. Replace(%q /* len == cap */) = %q, want %q", i, tc.in, s, tc.out)
300
// Replace with len(in) < cap(in)
301
in := make([]byte, len(tc.in), len(tc.in)*2)
303
if s := string(tc.r.Replace(in)); s != tc.out {
304
t.Errorf("%d. Replace(%q /* len < cap */) = %q, want %q", i, tc.in, s, tc.out)
310
func BenchmarkGenericNoMatch(b *testing.B) {
311
str := []byte(strings.Repeat("A", 100) + strings.Repeat("B", 100))
312
generic := New("a", "A", "b", "B", "12", "123") // varying lengths forces generic
313
for i := 0; i < b.N; i++ {
318
func BenchmarkGenericMatch1(b *testing.B) {
319
str := []byte(strings.Repeat("a", 100) + strings.Repeat("b", 100))
320
generic := New("a", "A", "b", "B", "12", "123")
321
for i := 0; i < b.N; i++ {
326
func BenchmarkGenericMatch2(b *testing.B) {
327
str := bytes.Repeat([]byte("It's <b>HTML</b>!"), 100)
328
for i := 0; i < b.N; i++ {
329
htmlUnescaper.Replace(str)
333
func benchmarkSingleString(b *testing.B, pattern, text string) {
334
r := New(pattern, "[match]")
335
buf := make([]byte, len(text), len(text)*7)
336
b.SetBytes(int64(len(text)))
338
for i := 0; i < b.N; i++ {
344
func BenchmarkSingleMaxSkipping(b *testing.B) {
345
benchmarkSingleString(b, strings.Repeat("b", 25), strings.Repeat("a", 10000))
348
func BenchmarkSingleLongSuffixFail(b *testing.B) {
349
benchmarkSingleString(b, "b"+strings.Repeat("a", 500), strings.Repeat("a", 1002))
352
func BenchmarkSingleMatch(b *testing.B) {
353
benchmarkSingleString(b, "abcdef", strings.Repeat("abcdefghijklmno", 1000))
356
func benchmarkReplacer(b *testing.B, r *Replacer, str string) {
357
buf := make([]byte, len(str))
359
for i := 0; i < b.N; i++ {
365
func BenchmarkByteByteNoMatch(b *testing.B) {
366
benchmarkReplacer(b, capitalLetters, strings.Repeat("A", 100)+strings.Repeat("B", 100))
369
func BenchmarkByteByteMatch(b *testing.B) {
370
benchmarkReplacer(b, capitalLetters, strings.Repeat("a", 100)+strings.Repeat("b", 100))
373
func BenchmarkByteStringMatch(b *testing.B) {
374
benchmarkReplacer(b, htmlEscaper, "<"+strings.Repeat("a", 99)+strings.Repeat("b", 99)+">")
377
func BenchmarkHTMLEscapeNew(b *testing.B) {
378
benchmarkReplacer(b, htmlEscaper, "I <3 to escape HTML & other text too.")
381
func BenchmarkHTMLEscapeOld(b *testing.B) {
382
str := "I <3 to escape HTML & other text too."
383
buf := make([]byte, len(str))
384
for i := 0; i < b.N; i++ {
390
// The http package's old HTML escaping function in bytes form.
391
func oldHTMLEscape(s []byte) []byte {
392
s = bytes.Replace(s, []byte("&"), []byte("&"), -1)
393
s = bytes.Replace(s, []byte("<"), []byte("<"), -1)
394
s = bytes.Replace(s, []byte(">"), []byte(">"), -1)
395
s = bytes.Replace(s, []byte(`"`), []byte("""), -1)
396
s = bytes.Replace(s, []byte("'"), []byte("'"), -1)
400
// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
401
func BenchmarkByteByteReplaces(b *testing.B) {
402
str := strings.Repeat("a", 100) + strings.Repeat("b", 100)
403
for i := 0; i < b.N; i++ {
404
bytes.Replace(bytes.Replace([]byte(str), []byte{'a'}, []byte{'A'}, -1), []byte{'b'}, []byte{'B'}, -1)
408
// BenchmarkByteByteMap compares byteByteImpl against Map.
409
func BenchmarkByteByteMap(b *testing.B) {
410
str := strings.Repeat("a", 100) + strings.Repeat("b", 100)
411
fn := func(r rune) rune {
420
for i := 0; i < b.N; i++ {
421
bytes.Map(fn, []byte(str))