1
// Copyright 2009 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
18
<?xml version="1.0" encoding="UTF-8"?>
19
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
20
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
21
<body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
23
<hello lang="en">World <>'" 白鵬翔</hello>
24
<query>&何; &is-it;</query>
26
<outer foo:attr="value" xmlns:tag="ns4">
30
<![CDATA[Some text here.]]>
32
</body><!-- missing final newline -->`
34
var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
36
var rawTokens = []Token{
38
ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
40
Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
41
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
43
StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
45
StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
46
CharData("World <>'\" 白鵬翔"),
47
EndElement{Name{"", "hello"}},
49
StartElement{Name{"", "query"}, []Attr{}},
50
CharData("What is it?"),
51
EndElement{Name{"", "query"}},
53
StartElement{Name{"", "goodbye"}, []Attr{}},
54
EndElement{Name{"", "goodbye"}},
56
StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
58
StartElement{Name{"", "inner"}, []Attr{}},
59
EndElement{Name{"", "inner"}},
61
EndElement{Name{"", "outer"}},
63
StartElement{Name{"tag", "name"}, []Attr{}},
65
CharData("Some text here."),
67
EndElement{Name{"tag", "name"}},
69
EndElement{Name{"", "body"}},
70
Comment(" missing final newline "),
73
var cookedTokens = []Token{
75
ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
77
Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
78
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
80
StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
82
StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
83
CharData("World <>'\" 白鵬翔"),
84
EndElement{Name{"ns2", "hello"}},
86
StartElement{Name{"ns2", "query"}, []Attr{}},
87
CharData("What is it?"),
88
EndElement{Name{"ns2", "query"}},
90
StartElement{Name{"ns2", "goodbye"}, []Attr{}},
91
EndElement{Name{"ns2", "goodbye"}},
93
StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
95
StartElement{Name{"ns2", "inner"}, []Attr{}},
96
EndElement{Name{"ns2", "inner"}},
98
EndElement{Name{"ns2", "outer"}},
100
StartElement{Name{"ns3", "name"}, []Attr{}},
102
CharData("Some text here."),
104
EndElement{Name{"ns3", "name"}},
106
EndElement{Name{"ns2", "body"}},
107
Comment(" missing final newline "),
110
const testInputAltEncoding = `
111
<?xml version="1.0" encoding="x-testing-uppercase"?>
114
var rawTokensAltEncoding = []Token{
116
ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
118
StartElement{Name{"", "tag"}, []Attr{}},
120
EndElement{Name{"", "tag"}},
123
var xmlInput = []string{
124
// unexpected EOF cases
149
// other Syntax errors
154
// "<!0 >", // let the Token() caller handle
163
// "<![CDATA[d]]>", // let the Token() caller handle
170
func TestRawToken(t *testing.T) {
171
d := NewDecoder(strings.NewReader(testInput))
172
d.Entity = testEntity
173
testRawToken(t, d, testInput, rawTokens)
176
const nonStrictInput = `
177
<tag>non&entity</tag>
178
<tag>&unknown;entity</tag>
187
var nonStringEntity = map[string]string{"": "oops!", "0a": "oops!"}
189
var nonStrictTokens = []Token{
191
StartElement{Name{"", "tag"}, []Attr{}},
192
CharData("non&entity"),
193
EndElement{Name{"", "tag"}},
195
StartElement{Name{"", "tag"}, []Attr{}},
196
CharData("&unknown;entity"),
197
EndElement{Name{"", "tag"}},
199
StartElement{Name{"", "tag"}, []Attr{}},
201
EndElement{Name{"", "tag"}},
203
StartElement{Name{"", "tag"}, []Attr{}},
205
EndElement{Name{"", "tag"}},
207
StartElement{Name{"", "tag"}, []Attr{}},
209
EndElement{Name{"", "tag"}},
211
StartElement{Name{"", "tag"}, []Attr{}},
213
EndElement{Name{"", "tag"}},
215
StartElement{Name{"", "tag"}, []Attr{}},
217
EndElement{Name{"", "tag"}},
219
StartElement{Name{"", "tag"}, []Attr{}},
221
EndElement{Name{"", "tag"}},
225
func TestNonStrictRawToken(t *testing.T) {
226
d := NewDecoder(strings.NewReader(nonStrictInput))
228
testRawToken(t, d, nonStrictInput, nonStrictTokens)
231
type downCaser struct {
236
func (d *downCaser) ReadByte() (c byte, err error) {
237
c, err = d.r.ReadByte()
238
if c >= 'A' && c <= 'Z' {
244
func (d *downCaser) Read(p []byte) (int, error) {
245
d.t.Fatalf("unexpected Read call on downCaser reader")
249
func TestRawTokenAltEncoding(t *testing.T) {
250
d := NewDecoder(strings.NewReader(testInputAltEncoding))
251
d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
252
if charset != "x-testing-uppercase" {
253
t.Fatalf("unexpected charset %q", charset)
255
return &downCaser{t, input.(io.ByteReader)}, nil
257
testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
260
func TestRawTokenAltEncodingNoConverter(t *testing.T) {
261
d := NewDecoder(strings.NewReader(testInputAltEncoding))
262
token, err := d.RawToken()
264
t.Fatalf("expected a token on first RawToken call")
269
token, err = d.RawToken()
271
t.Errorf("expected a nil token; got %#v", token)
274
t.Fatalf("expected an error on second RawToken call")
276
const encoding = "x-testing-uppercase"
277
if !strings.Contains(err.Error(), encoding) {
278
t.Errorf("expected error to contain %q; got error: %v",
283
func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
285
for i, want := range rawTokens {
286
start := d.InputOffset()
287
have, err := d.RawToken()
288
end := d.InputOffset()
290
t.Fatalf("token %d: unexpected error: %s", i, err)
292
if !reflect.DeepEqual(have, want) {
293
var shave, swant string
294
if _, ok := have.(CharData); ok {
295
shave = fmt.Sprintf("CharData(%q)", have)
297
shave = fmt.Sprintf("%#v", have)
299
if _, ok := want.(CharData); ok {
300
swant = fmt.Sprintf("CharData(%q)", want)
302
swant = fmt.Sprintf("%#v", want)
304
t.Errorf("token %d = %s, want %s", i, shave, swant)
307
// Check that InputOffset returned actual token.
309
case start < lastEnd:
310
t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
312
// Special case: EndElement can be synthesized.
313
if start == end && end == lastEnd {
316
t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
317
case end > int64(len(raw)):
318
t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
320
text := raw[start:end]
321
if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
322
t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
329
// Ensure that directives (specifically !DOCTYPE) include the complete
330
// text of any nested directives, noting that < and > do not change
331
// nesting depth if they are in single or double quotes.
333
var nestedDirectivesInput = `
334
<!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
335
<!DOCTYPE [<!ENTITY xlt ">">]>
336
<!DOCTYPE [<!ENTITY xlt "<">]>
337
<!DOCTYPE [<!ENTITY xlt '>'>]>
338
<!DOCTYPE [<!ENTITY xlt '<'>]>
339
<!DOCTYPE [<!ENTITY xlt '">'>]>
340
<!DOCTYPE [<!ENTITY xlt "'<">]>
343
var nestedDirectivesTokens = []Token{
345
Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
347
Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
349
Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
351
Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
353
Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
355
Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
357
Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
361
func TestNestedDirectives(t *testing.T) {
362
d := NewDecoder(strings.NewReader(nestedDirectivesInput))
364
for i, want := range nestedDirectivesTokens {
365
have, err := d.Token()
367
t.Fatalf("token %d: unexpected error: %s", i, err)
369
if !reflect.DeepEqual(have, want) {
370
t.Errorf("token %d = %#v want %#v", i, have, want)
375
func TestToken(t *testing.T) {
376
d := NewDecoder(strings.NewReader(testInput))
377
d.Entity = testEntity
379
for i, want := range cookedTokens {
380
have, err := d.Token()
382
t.Fatalf("token %d: unexpected error: %s", i, err)
384
if !reflect.DeepEqual(have, want) {
385
t.Errorf("token %d = %#v want %#v", i, have, want)
390
func TestSyntax(t *testing.T) {
391
for i := range xmlInput {
392
d := NewDecoder(strings.NewReader(xmlInput[i]))
394
for _, err = d.Token(); err == nil; _, err = d.Token() {
396
if _, ok := err.(*SyntaxError); !ok {
397
t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
402
type allScalars struct {
424
var all = allScalars{
448
const testScalarsInput = `<allscalars>
451
<False1>false</False1>
463
<Uintptr>11</Uintptr>
465
<Float32>13.0</Float32>
466
<Float64>14.0</Float64>
468
<PtrString>16</PtrString>
471
func TestAllScalars(t *testing.T) {
473
err := Unmarshal([]byte(testScalarsInput), &a)
478
if !reflect.DeepEqual(a, all) {
479
t.Errorf("have %+v want %+v", a, all)
487
func TestIssue569(t *testing.T) {
488
data := `<item><Field_a>abcd</Field_a></item>`
490
err := Unmarshal([]byte(data), &i)
492
if err != nil || i.Field_a != "abcd" {
493
t.Fatal("Expecting abcd")
497
func TestUnquotedAttrs(t *testing.T) {
498
data := "<tag attr=azAZ09:-_\t>"
499
d := NewDecoder(strings.NewReader(data))
501
token, err := d.Token()
502
if _, ok := err.(*SyntaxError); ok {
503
t.Errorf("Unexpected error: %v", err)
505
if token.(StartElement).Name.Local != "tag" {
506
t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
508
attr := token.(StartElement).Attr[0]
509
if attr.Value != "azAZ09:-_" {
510
t.Errorf("Unexpected attribute value: %v", attr.Value)
512
if attr.Name.Local != "attr" {
513
t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
517
func TestValuelessAttrs(t *testing.T) {
518
tests := [][3]string{
519
{"<p nowrap>", "p", "nowrap"},
520
{"<p nowrap >", "p", "nowrap"},
521
{"<input checked/>", "input", "checked"},
522
{"<input checked />", "input", "checked"},
524
for _, test := range tests {
525
d := NewDecoder(strings.NewReader(test[0]))
527
token, err := d.Token()
528
if _, ok := err.(*SyntaxError); ok {
529
t.Errorf("Unexpected error: %v", err)
531
if token.(StartElement).Name.Local != test[1] {
532
t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
534
attr := token.(StartElement).Attr[0]
535
if attr.Value != test[2] {
536
t.Errorf("Unexpected attribute value: %v", attr.Value)
538
if attr.Name.Local != test[2] {
539
t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
544
func TestCopyTokenCharData(t *testing.T) {
545
data := []byte("same data")
546
var tok1 Token = CharData(data)
547
tok2 := CopyToken(tok1)
548
if !reflect.DeepEqual(tok1, tok2) {
549
t.Error("CopyToken(CharData) != CharData")
552
if reflect.DeepEqual(tok1, tok2) {
553
t.Error("CopyToken(CharData) uses same buffer.")
557
func TestCopyTokenStartElement(t *testing.T) {
558
elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
560
tok2 := CopyToken(tok1)
561
if tok1.(StartElement).Attr[0].Value != "en" {
562
t.Error("CopyToken overwrote Attr[0]")
564
if !reflect.DeepEqual(tok1, tok2) {
565
t.Error("CopyToken(StartElement) != StartElement")
567
tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
568
if reflect.DeepEqual(tok1, tok2) {
569
t.Error("CopyToken(CharData) uses same buffer.")
573
func TestSyntaxErrorLineNum(t *testing.T) {
574
testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
575
d := NewDecoder(strings.NewReader(testInput))
577
for _, err = d.Token(); err == nil; _, err = d.Token() {
579
synerr, ok := err.(*SyntaxError)
581
t.Error("Expected SyntaxError.")
583
if synerr.Line != 3 {
584
t.Error("SyntaxError didn't have correct line number.")
588
func TestTrailingRawToken(t *testing.T) {
589
input := `<FOO></FOO> `
590
d := NewDecoder(strings.NewReader(input))
592
for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
595
t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
599
func TestTrailingToken(t *testing.T) {
600
input := `<FOO></FOO> `
601
d := NewDecoder(strings.NewReader(input))
603
for _, err = d.Token(); err == nil; _, err = d.Token() {
606
t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
610
func TestEntityInsideCDATA(t *testing.T) {
611
input := `<test><![CDATA[ &val=foo ]]></test>`
612
d := NewDecoder(strings.NewReader(input))
614
for _, err = d.Token(); err == nil; _, err = d.Token() {
617
t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
621
var characterTests = []struct {
625
{"\x12<doc/>", "illegal character code U+0012"},
626
{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
627
{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
628
{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
629
{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
630
{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
631
{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
632
{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
633
{"<doc>&hello;</doc>", "invalid character entity &hello;"},
636
func TestDisallowedCharacters(t *testing.T) {
638
for i, tt := range characterTests {
639
d := NewDecoder(strings.NewReader(tt.in))
645
synerr, ok := err.(*SyntaxError)
647
t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
649
if synerr.Msg != tt.err {
650
t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
655
type procInstEncodingTest struct {
659
var procInstTests = []struct {
662
{`version="1.0" encoding="utf-8"`, "utf-8"},
663
{`version="1.0" encoding='utf-8'`, "utf-8"},
664
{`version="1.0" encoding='utf-8' `, "utf-8"},
665
{`version="1.0" encoding=utf-8`, ""},
666
{`encoding="FOO" `, "FOO"},
669
func TestProcInstEncoding(t *testing.T) {
670
for _, test := range procInstTests {
671
got := procInstEncoding(test.input)
672
if got != test.expect {
673
t.Errorf("procInstEncoding(%q) = %q; want %q", test.input, got, test.expect)
678
// Ensure that directives with comments include the complete
679
// text of any nested directives.
681
var directivesWithCommentsInput = `
682
<!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
683
<!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
684
<!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
687
var directivesWithCommentsTokens = []Token{
689
Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
691
Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
693
Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang">]`),
697
func TestDirectivesWithComments(t *testing.T) {
698
d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
700
for i, want := range directivesWithCommentsTokens {
701
have, err := d.Token()
703
t.Fatalf("token %d: unexpected error: %s", i, err)
705
if !reflect.DeepEqual(have, want) {
706
t.Errorf("token %d = %#v want %#v", i, have, want)
711
// Writer whose Write method always returns an error.
712
type errWriter struct{}
714
func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
716
func TestEscapeTextIOErrors(t *testing.T) {
717
expectErr := "unwritable"
718
err := EscapeText(errWriter{}, []byte{'A'})
720
if err == nil || err.Error() != expectErr {
721
t.Errorf("have %v, want %v", err, expectErr)
725
func TestEscapeTextInvalidChar(t *testing.T) {
726
input := []byte("A \x00 terminated string.")
727
expected := "A \uFFFD terminated string."
729
buff := new(bytes.Buffer)
730
if err := EscapeText(buff, input); err != nil {
731
t.Fatalf("have %v, want nil", err)
733
text := buff.String()
735
if text != expected {
736
t.Errorf("have %v, want %v", text, expected)
740
func TestIssue5880(t *testing.T) {
742
data, err := Marshal(T{192, 168, 0, 1})
744
t.Errorf("Marshal error: %v", err)
746
if !utf8.Valid(data) {
747
t.Errorf("Marshal generated invalid UTF-8: %x", data)