2
Copyright 2015 The Camlistore Authors
4
Licensed under the Apache License, Version 2.0 (the "License");
5
you may not use this file except in compliance with the License.
6
You may obtain a copy of the License at
8
http://www.apache.org/licenses/LICENSE-2.0
10
Unless required by applicable law or agreed to in writing, software
11
distributed under the License is distributed on an "AS IS" BASIS,
12
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
See the License for the specific language governing permissions and
14
limitations under the License.
17
// Package bytereplacer provides a utility for replacing parts of byte slices.
22
// Replacer replaces a list of strings with replacements.
23
// It is safe for concurrent use by multiple goroutines.
24
type Replacer struct {
28
// replacer is the interface that a replacement algorithm needs to implement.
29
type replacer interface {
30
// Replace performs all replacements, in-place if possible.
31
Replace(s []byte) []byte
34
// New returns a new Replacer from a list of old, new string pairs.
35
// Replacements are performed in order, without overlapping matches.
36
func New(oldnew ...string) *Replacer {
37
if len(oldnew)%2 == 1 {
38
panic("bytes.NewReplacer: odd argument count")
42
for i := 0; i < len(oldnew); i += 2 {
43
if len(oldnew[i]) != 1 {
44
return &Replacer{r: makeGenericReplacer(oldnew)}
46
if len(oldnew[i+1]) != 1 {
56
// The first occurrence of old->new map takes precedence
57
// over the others with the same old string.
58
for i := len(oldnew) - 2; i >= 0; i -= 2 {
63
return &Replacer{r: &r}
66
return &Replacer{r: makeGenericReplacer(oldnew)}
69
// Replace performs all replacements in-place on s. If the capacity
70
// of s is not sufficient, a new slice is allocated, otherwise Replace
72
func (r *Replacer) Replace(s []byte) []byte {
76
type trieNode struct {
84
func (t *trieNode) add(key, val []byte, priority int, r *genericReplacer) {
93
if len(t.prefix) > 0 {
94
// Need to split the prefix among multiple nodes.
95
var n int // length of the longest common prefix
96
for ; n < len(t.prefix) && n < len(key); n++ {
97
if t.prefix[n] != key[n] {
101
if n == len(t.prefix) {
102
t.next.add(key[n:], val, priority, r)
104
// First byte differs, start a new lookup table here. Looking up
105
// what is currently t.prefix[0] will lead to prefixNode, and
106
// looking up key[0] will lead to keyNode.
107
var prefixNode *trieNode
108
if len(t.prefix) == 1 {
111
prefixNode = &trieNode{
112
prefix: t.prefix[1:],
116
keyNode := new(trieNode)
117
t.table = make([]*trieNode, r.tableSize)
118
t.table[r.mapping[t.prefix[0]]] = prefixNode
119
t.table[r.mapping[key[0]]] = keyNode
122
keyNode.add(key[1:], val, priority, r)
124
// Insert new node after the common section of the prefix.
126
prefix: t.prefix[n:],
129
t.prefix = t.prefix[:n]
131
next.add(key[n:], val, priority, r)
133
} else if t.table != nil {
134
// Insert into existing table.
135
m := r.mapping[key[0]]
136
if t.table[m] == nil {
137
t.table[m] = new(trieNode)
139
t.table[m].add(key[1:], val, priority, r)
142
t.next = new(trieNode)
143
t.next.add(nil, val, priority, r)
147
func (r *genericReplacer) lookup(s []byte, ignoreRoot bool) (val []byte, keylen int, found bool) {
148
// Iterate down the trie to the end, and grab the value and keylen with
149
// the highest priority.
154
if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
155
bestPriority = node.priority
164
if node.table != nil {
165
index := r.mapping[s[0]]
166
if int(index) == r.tableSize {
169
node = node.table[index]
172
} else if len(node.prefix) > 0 && bytes.HasPrefix(s, node.prefix) {
173
n += len(node.prefix)
174
s = s[len(node.prefix):]
183
// genericReplacer is the fully generic algorithm.
184
// It's used as a fallback when nothing faster can be used.
185
type genericReplacer struct {
187
// tableSize is the size of a trie node's lookup table. It is the number
188
// of unique key bytes.
190
// mapping maps from key bytes to a dense index for trieNode.table.
194
func makeGenericReplacer(oldnew []string) *genericReplacer {
195
r := new(genericReplacer)
196
// Find each byte used, then assign them each an index.
197
for i := 0; i < len(oldnew); i += 2 {
199
for j := 0; j < len(key); j++ {
200
r.mapping[key[j]] = 1
204
for _, b := range r.mapping {
205
r.tableSize += int(b)
209
for i, b := range r.mapping {
211
r.mapping[i] = byte(r.tableSize)
217
// Ensure root node uses a lookup table (for performance).
218
r.root.table = make([]*trieNode, r.tableSize)
220
for i := 0; i < len(oldnew); i += 2 {
221
r.root.add([]byte(oldnew[i]), []byte(oldnew[i+1]), len(oldnew)-i, r)
226
func (r *genericReplacer) Replace(s []byte) []byte {
228
var prevMatchEmpty bool
231
for i := 0; i <= len(s); {
232
// Fast path: s[i] is not a prefix of any pattern.
233
if i != len(s) && r.root.priority == 0 {
234
index := int(r.mapping[s[i]])
235
if index == r.tableSize || r.root.table[index] == nil {
241
// Ignore the empty match iff the previous loop found the empty match.
242
val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
243
prevMatchEmpty = match && keylen == 0
245
dst = append(dst, s[last:i]...)
246
if diff := len(val) - keylen; grown || diff < 0 {
247
dst = append(dst, val...)
249
} else if diff <= cap(s)-len(s) {
250
// The replacement is larger than the original, but can still fit in the original buffer.
251
copy(s[i+len(val):cap(dst)], s[i+keylen:])
252
dst = append(dst, val...)
256
// The output will grow larger than the original buffer. Allocate a new one.
258
newDst := make([]byte, len(dst), cap(dst)+diff)
262
dst = append(dst, val...)
271
dst = append(dst, s[last:]...)
276
// byteReplacer is the implementation that's used when all the "old"
277
// and "new" values are single ASCII bytes.
278
// The array contains replacement bytes indexed by old byte.
279
type byteReplacer [256]byte
281
func (r *byteReplacer) Replace(s []byte) []byte {
282
for i, b := range s {