1
// Copyright 2013 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
19
type lowerCaseASCII struct{}
21
func (lowerCaseASCII) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
24
n, err = len(dst), ErrShortDst
26
for i, c := range src[:n] {
27
if 'A' <= c && c <= 'Z' {
35
var errYouMentionedX = errors.New("you mentioned X")
37
type dontMentionX struct{}
39
func (dontMentionX) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
42
n, err = len(dst), ErrShortDst
44
for i, c := range src[:n] {
46
return i, i, errYouMentionedX
53
// doublerAtEOF is a strange Transformer that transforms "this" to "tthhiiss",
54
// but only if atEOF is true.
55
type doublerAtEOF struct{}
57
func (doublerAtEOF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
59
return 0, 0, ErrShortSrc
61
for i, c := range src {
62
if 2*i+2 >= len(dst) {
63
return 2 * i, i, ErrShortDst
68
return 2 * len(src), len(src), nil
71
// rleDecode and rleEncode implement a toy run-length encoding: "aabbbbbbbbbb"
72
// is encoded as "2a10b". The decoding is assumed to not contain any numbers.
74
type rleDecode struct{}
76
func (rleDecode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
80
for i, c := range src {
81
if '0' <= c && c <= '9' {
86
return nDst, nSrc, errors.New("rleDecode: bad input")
89
return nDst, nSrc, ErrShortDst
91
for j := 0; j < n; j++ {
94
dst, src = dst[n:], src[i+1:]
95
nDst, nSrc = nDst+n, nSrc+i+1
99
return nDst, nSrc, errors.New("rleDecode: bad input")
101
return nDst, nSrc, ErrShortSrc
103
return nDst, nSrc, nil
106
type rleEncode struct {
107
// allowStutter means that "xxxxxxxx" can be encoded as "5x3x"
108
// instead of always as "8x".
112
func (e rleEncode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
114
n, c0 := len(src), src[0]
115
for i, c := range src[1:] {
121
if n == len(src) && !atEOF && !e.allowStutter {
122
return nDst, nSrc, ErrShortSrc
125
if len(s) >= len(dst) {
126
return nDst, nSrc, ErrShortDst
130
dst, src = dst[len(s)+1:], src[n:]
131
nDst, nSrc = nDst+len(s)+1, nSrc+n
133
return nDst, nSrc, nil
136
// trickler consumes all input bytes, but writes a single byte at a time to dst.
139
func (t *trickler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
140
*t = append(*t, src...)
145
return 0, len(src), ErrShortDst
152
return 1, len(src), err
155
// delayedTrickler is like trickler, but delays writing output to dst. This is
156
// highly unlikely to be relevant in practice, but it seems like a good idea
157
// to have some tolerance as long as progress can be detected.
158
type delayedTrickler []byte
160
func (t *delayedTrickler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
161
if len(*t) > 0 && len(dst) > 0 {
166
*t = append(*t, src...)
170
return nDst, len(src), err
173
type testCase struct {
182
wantIter int // number of iterations taken; 0 means we don't care.
185
func (t testCase) String() string {
186
return tstr(t.t) + "; " + t.desc
189
func tstr(t Transformer) string {
190
if stringer, ok := t.(fmt.Stringer); ok {
191
return stringer.String()
193
s := fmt.Sprintf("%T", t)
194
return s[1+strings.Index(s, "."):]
197
func (c chain) String() string {
198
buf := &bytes.Buffer{}
199
buf.WriteString("Chain(")
200
for i, l := range c.link[:len(c.link)-1] {
202
fmt.Fprint(buf, ", ")
204
buf.WriteString(tstr(l.t))
210
var testCases = []testCase{
226
wantStr: "hello world.",
235
wantStr: "hello world.",
244
wantStr: "hello world.",
248
desc: "small buffers",
253
wantStr: "hello world.",
257
desc: "very small buffers",
262
wantStr: "hello world.",
268
src: "The First Rule of Transform Club: don't mention Mister X, ever.",
271
wantStr: "The First Rule of Transform Club: don't mention Mister ",
272
wantErr: errYouMentionedX,
276
desc: "small buffers",
278
src: "The First Rule of Transform Club: don't mention Mister X, ever.",
281
wantStr: "The First Rule of Transform Club: don't mention Mister ",
282
wantErr: errYouMentionedX,
286
desc: "very small buffers",
288
src: "The First Rule of Transform Club: don't mention Mister X, ever.",
291
wantStr: "The First Rule of Transform Club: don't mention Mister ",
292
wantErr: errYouMentionedX,
296
desc: "only transform at EOF",
307
src: "1a2b3c10d11e0f1g",
310
wantStr: "abbcccddddddddddeeeeeeeeeeeg",
316
src: "12a23b34c45d56e99z",
319
wantStr: strings.Repeat("a", 12) +
320
strings.Repeat("b", 23) +
321
strings.Repeat("c", 34) +
322
strings.Repeat("d", 45) +
323
strings.Repeat("e", 56) +
324
strings.Repeat("z", 99),
328
desc: "tight buffers",
330
src: "1a2b3c10d11e0f1g",
333
wantStr: "abbcccddddddddddeeeeeeeeeeeg",
339
src: "1a2b3c10d11e0f1g",
342
wantStr: "abbcccdddddddddd",
343
wantErr: ErrShortDst,
349
src: "1a2b3c10d11e0f1g",
354
wantErr: ErrShortSrc,
360
src: "abbcccddddddddddeeeeeeeeeeeg",
363
wantStr: "1a2b3c10d11e1g",
369
src: strings.Repeat("a", 12) +
370
strings.Repeat("b", 23) +
371
strings.Repeat("c", 34) +
372
strings.Repeat("d", 45) +
373
strings.Repeat("e", 56) +
374
strings.Repeat("z", 99),
377
wantStr: "12a23b34c45d56e99z",
381
desc: "tight buffers",
383
src: "abbcccddddddddddeeeeeeeeeeeg",
386
wantStr: "1a2b3c10d11e1g",
392
src: "abbcccddddddddddeeeeeeeeeeeg",
396
wantErr: ErrShortDst,
402
src: "abbcccddddddddddeeeeeeeeeeeg",
406
wantStr: "1a2b3c10d",
407
wantErr: ErrShortSrc,
411
desc: "allowStutter = false",
412
t: rleEncode{allowStutter: false},
413
src: "aaaabbbbbbbbccccddddd",
420
desc: "allowStutter = true",
421
t: rleEncode{allowStutter: true},
422
src: "aaaabbbbbbbbccccddddd",
426
wantStr: "4a6b2b4c4d1d",
432
src: "abcdefghijklm",
435
wantStr: "abcdefghijklm",
439
desc: "delayedTrickler",
440
t: &delayedTrickler{},
441
src: "abcdefghijklm",
444
wantStr: "abcdefghijklm",
448
func TestReader(t *testing.T) {
449
for _, tc := range testCases {
451
r := NewReader(strings.NewReader(tc.src), tc.t)
452
// Differently sized dst and src buffers are not part of the
453
// exported API. We override them manually.
454
r.dst = make([]byte, tc.dstSize)
455
r.src = make([]byte, tc.srcSize)
456
got, err := ioutil.ReadAll(r)
458
if str != tc.wantStr || err != tc.wantErr {
459
t.Errorf("%s:\ngot %q, %v\nwant %q, %v", tc, str, err, tc.wantStr, tc.wantErr)
464
func reset(t Transformer) {
466
for err := ErrShortDst; err != nil; {
467
_, _, err = t.Transform(dst[:], nil, true)
471
func TestWriter(t *testing.T) {
472
tests := append(testCases, chainTests()...)
473
for _, tc := range tests {
474
sizes := []int{1, 2, 3, 4, 5, 10, 100, 1000}
476
sizes = []int{tc.ioSize}
478
for _, sz := range sizes {
479
bb := &bytes.Buffer{}
481
w := NewWriter(bb, tc.t)
482
// Differently sized dst and src buffers are not part of the
483
// exported API. We override them manually.
484
w.dst = make([]byte, tc.dstSize)
485
w.src = make([]byte, tc.srcSize)
486
src := make([]byte, sz)
488
for b := tc.src; len(b) > 0 && err == nil; {
492
m, err = w.Write(src[:n])
493
if m != n && err == nil {
494
t.Errorf("%s:%d: did not consume all bytes %d < %d", tc, sz, m, n)
501
if str != tc.wantStr || err != tc.wantErr {
502
t.Errorf("%s:%d:\ngot %q, %v\nwant %q, %v", tc, sz, str, err, tc.wantStr, tc.wantErr)
508
func TestNop(t *testing.T) {
509
testCases := []struct {
516
{"a", 0, ErrShortDst},
520
for i, tc := range testCases {
521
dst := make([]byte, tc.dstSize)
522
nDst, nSrc, err := Nop.Transform(dst, []byte(tc.str), true)
524
if tc.dstSize < len(want) {
525
want = want[:tc.dstSize]
527
if got := string(dst[:nDst]); got != want || err != tc.err || nSrc != nDst {
528
t.Errorf("%d:\ngot %q, %d, %v\nwant %q, %d, %v", i, got, nSrc, err, want, nDst, tc.err)
533
func TestDiscard(t *testing.T) {
534
testCases := []struct {
543
for i, tc := range testCases {
544
nDst, nSrc, err := Discard.Transform(make([]byte, tc.dstSize), []byte(tc.str), true)
545
if nDst != 0 || nSrc != len(tc.str) || err != nil {
546
t.Errorf("%d:\ngot %q, %d, %v\nwant 0, %d, nil", i, nDst, nSrc, err, len(tc.str))
551
// mkChain creates a Chain transformer. x must be alternating between transformer
552
// and bufSize, like T, (sz, T)*
553
func mkChain(x ...interface{}) *chain {
555
for i := 0; i < len(x); i += 2 {
556
t = append(t, x[i].(Transformer))
558
c := Chain(t...).(*chain)
559
for i, j := 1, 1; i < len(x); i, j = i+2, j+1 {
560
c.link[j].b = make([]byte, x[i].(int))
565
func chainTests() []testCase {
569
t: mkChain(rleEncode{}, 100, lowerCaseASCII{}),
579
desc: "short dst buffer",
580
t: mkChain(lowerCaseASCII{}, 3, rleDecode{}),
581
src: "1a2b3c10d11e0f1g",
584
wantStr: "abbcccdddddddddd",
585
wantErr: ErrShortDst,
589
desc: "short internal dst buffer",
590
t: mkChain(lowerCaseASCII{}, 3, rleDecode{}, 10, Nop),
591
src: "1a2b3c10d11e0f1g",
594
wantStr: "abbcccdddddddddd",
595
wantErr: errShortInternal,
599
desc: "short internal dst buffer from input",
600
t: mkChain(rleDecode{}, 10, Nop),
601
src: "1a2b3c10d11e0f1g",
604
wantStr: "abbcccdddddddddd",
605
wantErr: errShortInternal,
609
desc: "empty short internal dst buffer",
610
t: mkChain(lowerCaseASCII{}, 3, rleDecode{}, 10, Nop),
614
wantStr: "aaaabbbbbbb",
615
wantErr: errShortInternal,
619
desc: "empty short internal dst buffer from input",
620
t: mkChain(rleDecode{}, 10, Nop),
624
wantStr: "aaaabbbbbbb",
625
wantErr: errShortInternal,
629
desc: "short internal src buffer after full dst buffer",
630
t: mkChain(Nop, 5, rleEncode{}, 10, Nop),
635
wantErr: errShortInternal,
640
desc: "short internal src buffer after short dst buffer; test lastFull",
641
t: mkChain(rleDecode{}, 5, rleEncode{}, 4, Nop),
646
wantErr: errShortInternal,
650
desc: "short internal src buffer after successful complete fill",
651
t: mkChain(Nop, 3, rleDecode{}),
656
wantErr: errShortInternal,
661
desc: "short internal src buffer after short dst buffer; test lastFull",
662
t: mkChain(rleDecode{}, 5, rleEncode{}),
667
wantErr: errShortInternal,
671
desc: "short src buffer",
672
t: mkChain(rleEncode{}, 5, Nop),
673
src: "abbcccddddeeeee",
678
wantErr: ErrShortSrc,
682
desc: "process all in one go",
683
t: mkChain(rleEncode{}, 5, Nop),
684
src: "abbcccddddeeeeeffffff",
687
wantStr: "1a2b3c4d5e6f",
693
desc: "complete processing downstream after error",
694
t: mkChain(dontMentionX{}, 2, rleDecode{}, 5, Nop),
699
wantStr: "aaabbbbeeeee",
700
wantErr: errYouMentionedX,
704
desc: "return downstream fatal errors first (followed by short dst)",
705
t: mkChain(dontMentionX{}, 8, rleDecode{}, 4, Nop),
711
wantErr: errShortInternal,
715
desc: "return downstream fatal errors first (followed by short src)",
716
t: mkChain(dontMentionX{}, 5, Nop, 1, rleDecode{}),
722
wantErr: errShortInternal,
726
desc: "short internal",
727
t: mkChain(Nop, 11, rleEncode{}, 3, Nop),
728
src: "abbcccddddddddddeeeeeeeeeeeg",
731
wantStr: "1a2b3c10d",
732
wantErr: errShortInternal,
737
func doTransform(tc testCase) (res string, iter int, err error) {
739
dst := make([]byte, tc.dstSize)
740
out, in := make([]byte, 0, 2*len(tc.src)), []byte(tc.src)
743
src, atEOF := in, true
744
if len(src) > tc.srcSize {
745
src, atEOF = src[:tc.srcSize], false
747
nDst, nSrc, err := tc.t.Transform(dst, src, atEOF)
748
out = append(out, dst[:nDst]...)
751
case err == nil && len(in) != 0:
752
case err == ErrShortSrc && nSrc > 0:
753
case err == ErrShortDst && (nDst > 0 || nSrc > 0):
755
return string(out), iter, err
760
func TestChain(t *testing.T) {
761
if c, ok := Chain().(nop); !ok {
762
t.Errorf("empty chain: %v; want Nop", c)
765
// Test Chain for a single Transformer.
766
for _, tc := range testCases {
768
str, _, err := doTransform(tc)
769
if str != tc.wantStr || err != tc.wantErr {
770
t.Errorf("%s:\ngot %q, %v\nwant %q, %v", tc, str, err, tc.wantStr, tc.wantErr)
774
tests := chainTests()
775
sizes := []int{1, 2, 3, 4, 5, 7, 10, 100, 1000}
776
addTest := func(tc testCase, t *chain) {
777
if t.link[0].t != tc.t && tc.wantErr == ErrShortSrc {
778
tc.wantErr = errShortInternal
780
if t.link[len(t.link)-2].t != tc.t && tc.wantErr == ErrShortDst {
781
tc.wantErr = errShortInternal
784
tests = append(tests, tc)
786
for _, tc := range testCases {
787
for _, sz := range sizes {
790
addTest(tt, mkChain(tc.t, tc.dstSize, Nop))
791
addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 2, Nop))
792
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop))
793
if sz >= tc.dstSize && (tc.wantErr != ErrShortDst || sz == tc.dstSize) {
794
addTest(tt, mkChain(Nop, tc.srcSize, tc.t))
795
addTest(tt, mkChain(Nop, 100, Nop, tc.srcSize, tc.t))
799
for _, tc := range testCases {
803
addTest(tt, mkChain(tc.t, tc.dstSize, Discard))
804
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Discard))
805
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, tc.dstSize, Discard))
807
for _, tc := range testCases {
810
tt.wantStr = strings.Replace(tc.src, "0f", "", -1)
811
// Chain encoders and decoders.
812
if _, ok := tc.t.(rleEncode); ok && tc.wantErr == nil {
813
addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 1000, rleDecode{}))
814
addTest(tt, mkChain(tc.t, tc.dstSize, Nop, tc.dstSize, rleDecode{}))
815
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleDecode{}))
816
// decoding needs larger destinations
817
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, rleDecode{}, 100, Nop))
818
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleDecode{}, 100, Nop))
819
} else if _, ok := tc.t.(rleDecode); ok && tc.wantErr == nil {
820
// The internal buffer size may need to be the sum of the maximum segment
821
// size of the two encoders!
822
addTest(tt, mkChain(tc.t, 2*tc.dstSize, rleEncode{}))
823
addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 101, rleEncode{}))
824
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleEncode{}))
825
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 200, rleEncode{}, 100, Nop))
828
for _, tc := range tests {
829
str, iter, err := doTransform(tc)
830
mi := tc.wantIter != 0 && tc.wantIter != iter
831
if str != tc.wantStr || err != tc.wantErr || mi {
832
t.Errorf("%s:\ngot iter:%d, %q, %v\nwant iter:%d, %q, %v", tc, iter, str, err, tc.wantIter, tc.wantStr, tc.wantErr)
838
func TestRemoveFunc(t *testing.T) {
839
filter := RemoveFunc(func(r rune) bool {
840
return strings.IndexRune("ab\u0300\u1234,", r) != -1
864
src: ",до,свидания,",
865
wantStr: "досвидания",
869
src: "a\xbd\xb2=\xbc ⌘",
870
wantStr: "\uFFFD\uFFFD=\uFFFD ⌘",
874
// If we didn't replace illegal bytes with RuneError, the result
875
// would be \u0300 or the code would need to be more complex.
876
src: "\xcc\u0300\x80",
877
wantStr: "\uFFFD\uFFFD",
881
src: "\xcc\u0300\x80",
883
wantStr: "\uFFFD\uFFFD",
888
// Test a long buffer greater than the internal buffer size
889
src: "hello\xcc\xcc\xccworld",
891
wantStr: "hello\uFFFD\uFFFD\uFFFDworld",
899
wantErr: ErrShortDst,
906
wantErr: ErrShortDst,
914
wantErr: ErrShortSrc,
918
t: RemoveFunc(func(r rune) bool {
919
return r == utf8.RuneError
921
src: "\xcc\u0300\x80",
926
for _, tc := range tests {
937
str, iter, err := doTransform(tc)
938
mi := tc.wantIter != 0 && tc.wantIter != iter
939
if str != tc.wantStr || err != tc.wantErr || mi {
940
t.Errorf("%+q:\ngot iter:%d, %+q, %v\nwant iter:%d, %+q, %v", tc.src, iter, str, err, tc.wantIter, tc.wantStr, tc.wantErr)
944
idem, _, _ := doTransform(tc)
946
t.Errorf("%+q: found %+q; want %+q", tc.src, idem, str)
951
func testString(t *testing.T, f func(Transformer, string) (string, int, error)) {
952
for _, tt := range append(testCases, chainTests()...) {
953
if tt.desc == "allowStutter = true" {
954
// We don't have control over the buffer size, so we eliminate tests
955
// that depend on a specific buffer size being set.
959
if tt.wantErr == ErrShortDst || tt.wantErr == ErrShortSrc {
960
// The result string will be different.
963
got, n, err := f(tt.t, tt.src)
964
if tt.wantErr != err {
965
t.Errorf("%s:error: got %v; want %v", tt.desc, err, tt.wantErr)
967
if got, want := err == nil, n == len(tt.src); got != want {
968
t.Errorf("%s:n: got %v; want %v", tt.desc, got, want)
970
if got != tt.wantStr {
971
t.Errorf("%s:string: got %q; want %q", tt.desc, got, tt.wantStr)
976
func TestBytes(t *testing.T) {
977
testString(t, func(z Transformer, s string) (string, int, error) {
978
b, n, err := Bytes(z, []byte(s))
979
return string(b), n, err
983
func TestString(t *testing.T) {
984
testString(t, String)
986
// Overrun the internal destination buffer.
987
for i, s := range []string{
988
strings.Repeat("a", initialBufSize-1),
989
strings.Repeat("a", initialBufSize+0),
990
strings.Repeat("a", initialBufSize+1),
991
strings.Repeat("A", initialBufSize-1),
992
strings.Repeat("A", initialBufSize+0),
993
strings.Repeat("A", initialBufSize+1),
994
strings.Repeat("A", 2*initialBufSize-1),
995
strings.Repeat("A", 2*initialBufSize+0),
996
strings.Repeat("A", 2*initialBufSize+1),
997
strings.Repeat("a", initialBufSize-2) + "A",
998
strings.Repeat("a", initialBufSize-1) + "A",
999
strings.Repeat("a", initialBufSize+0) + "A",
1000
strings.Repeat("a", initialBufSize+1) + "A",
1002
got, _, _ := String(lowerCaseASCII{}, s)
1003
if want := strings.ToLower(s); got != want {
1004
t.Errorf("%d:dst buffer test: got %s (%d); want %s (%d)", i, got, len(got), want, len(want))
1008
// Overrun the internal source buffer.
1009
for i, s := range []string{
1010
strings.Repeat("a", initialBufSize-1),
1011
strings.Repeat("a", initialBufSize+0),
1012
strings.Repeat("a", initialBufSize+1),
1013
strings.Repeat("a", 2*initialBufSize+1),
1014
strings.Repeat("a", 2*initialBufSize+0),
1015
strings.Repeat("a", 2*initialBufSize+1),
1017
got, _, _ := String(rleEncode{}, s)
1018
if want := fmt.Sprintf("%da", len(s)); got != want {
1019
t.Errorf("%d:src buffer test: got %s (%d); want %s (%d)", i, got, len(got), want, len(want))
1023
// Test allocations for non-changing strings.
1024
// Note we still need to allocate a single buffer.
1025
for i, s := range []string{
1029
strings.Repeat("a", initialBufSize),
1030
strings.Repeat("a", 10*initialBufSize),
1032
if n := testing.AllocsPerRun(5, func() { String(&lowerCaseASCII{}, s) }); n > 1 {
1033
t.Errorf("%d: #allocs was %f; want 1", i, n)
1038
// TestBytesAllocation tests that buffer growth stays limited with the trickler
1039
// transformer, which behaves oddly but within spec. In case buffer growth is
1040
// not correctly handled, the test will either panic with a failed allocation or
1041
// thrash. To ensure the tests terminate under the last condition, we time out
1042
// after some sufficiently long period of time.
1043
func TestBytesAllocation(t *testing.T) {
1044
done := make(chan bool)
1046
in := bytes.Repeat([]byte{'a'}, 1000)
1047
tr := trickler(make([]byte, 1))
1053
case <-time.After(3 * time.Second):
1054
t.Error("time out, likely due to excessive allocation")
1058
// TestStringAllocation tests that buffer growth stays limited with the trickler
1059
// transformer, which behaves oddly but within spec. In case buffer growth is
1060
// not correctly handled, the test will either panic with a failed allocation or
1061
// thrash. To ensure the tests terminate under the last condition, we time out
1062
// after some sufficiently long period of time.
1063
func TestStringAllocation(t *testing.T) {
1064
done := make(chan bool)
1066
in := strings.Repeat("a", 1000)
1067
tr := trickler(make([]byte, 1))
1073
case <-time.After(3 * time.Second):
1074
t.Error("time out, likely due to excessive allocation")
1078
func BenchmarkStringLower(b *testing.B) {
1079
in := strings.Repeat("a", 4096)
1080
for i := 0; i < b.N; i++ {
1081
String(&lowerCaseASCII{}, in)