5
// Atsushi Enomoto <atsushi@ximian.com>
7
// Copyright (C) 2005 Novell, Inc (http://www.novell.com)
9
// Permission is hereby granted, free of charge, to any person obtaining
10
// a copy of this software and associated documentation files (the
11
// "Software"), to deal in the Software without restriction, including
12
// without limitation the rights to use, copy, modify, merge, publish,
13
// distribute, sublicense, and/or sell copies of the Software, and to
14
// permit persons to whom the Software is furnished to do so, subject to
15
// the following conditions:
17
// The above copyright notice and this permission notice shall be
18
// included in all copies or substantial portions of the Software.
20
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30
// small xml parser that is mostly compatible with
34
using System.Collections;
35
using System.Globalization;
41
internal sealed class DefaultHandler : SmallXmlParser.IContentHandler
43
public void OnStartParsing (SmallXmlParser parser)
47
public void OnEndParsing (SmallXmlParser parser)
51
public void OnStartElement (string name, SmallXmlParser.IAttrList attrs)
55
public void OnEndElement (string name)
59
public void OnChars (string s)
63
public void OnIgnorableWhitespace (string s)
67
public void OnProcessingInstruction (string name, string text)
72
internal class SmallXmlParser
74
public interface IContentHandler
76
void OnStartParsing (SmallXmlParser parser);
77
void OnEndParsing (SmallXmlParser parser);
78
void OnStartElement (string name, IAttrList attrs);
79
void OnEndElement (string name);
80
void OnProcessingInstruction (string name, string text);
81
void OnChars (string text);
82
void OnIgnorableWhitespace (string text);
85
public interface IAttrList
89
string GetName (int i);
90
string GetValue (int i);
91
string GetValue (string name);
92
string [] Names { get; }
93
string [] Values { get; }
96
sealed class AttrListImpl : IAttrList
99
get { return attrNames.Count; }
101
public bool IsEmpty {
102
get { return attrNames.Count == 0; }
104
public string GetName (int i)
106
return (string) attrNames [i];
108
public string GetValue (int i)
110
return (string) attrValues [i];
112
public string GetValue (string name)
114
for (int i = 0; i < attrNames.Count; i++)
115
if ((string) attrNames [i] == name)
116
return (string) attrValues [i];
119
public string [] Names {
120
get { return (string []) attrNames.ToArray (typeof (string)); }
122
public string [] Values {
123
get { return (string []) attrValues.ToArray (typeof (string)); }
126
ArrayList attrNames = new ArrayList ();
127
ArrayList attrValues = new ArrayList ();
129
internal void Clear ()
135
internal void Add (string name, string value)
137
attrNames.Add (name);
138
attrValues.Add (value);
142
IContentHandler handler;
144
Stack elementNames = new Stack ();
145
Stack xmlSpaces = new Stack ();
147
StringBuilder buffer = new StringBuilder (200);
148
char [] nameBuffer = new char [30];
151
AttrListImpl attributes = new AttrListImpl ();
152
int line = 1, column;
155
public SmallXmlParser ()
159
private Exception Error (string msg)
161
return new SmallXmlParserException (msg, line, column);
164
private Exception UnexpectedEndError ()
166
string [] arr = new string [elementNames.Count];
167
// COMPACT FRAMEWORK NOTE: CopyTo is not visible through the Stack class
168
(elementNames as ICollection).CopyTo (arr, 0);
169
return Error (String.Format (
170
"Unexpected end of stream. Element stack content is {0}", String.Join (",", arr)));
174
private bool IsNameChar (char c, bool start)
184
if (c > 0x100) { // optional condition for optimization
191
if ('\u02BB' <= c && c <= '\u02C1')
194
switch (Char.GetUnicodeCategory (c)) {
195
case UnicodeCategory.LowercaseLetter:
196
case UnicodeCategory.UppercaseLetter:
197
case UnicodeCategory.OtherLetter:
198
case UnicodeCategory.TitlecaseLetter:
199
case UnicodeCategory.LetterNumber:
201
case UnicodeCategory.SpacingCombiningMark:
202
case UnicodeCategory.EnclosingMark:
203
case UnicodeCategory.NonSpacingMark:
204
case UnicodeCategory.ModifierLetter:
205
case UnicodeCategory.DecimalDigitNumber:
212
private bool IsWhitespace (int c)
226
public void SkipWhitespaces ()
228
SkipWhitespaces (false);
231
private void HandleWhitespaces ()
233
while (IsWhitespace (Peek ()))
234
buffer.Append ((char) Read ());
235
if (Peek () != '<' && Peek () >= 0)
236
isWhitespace = false;
239
public void SkipWhitespaces (bool expected)
253
throw Error ("Whitespace is expected.");
261
return reader.Peek ();
266
int i = reader.Read ();
279
public void Expect (int c)
283
throw UnexpectedEndError ();
285
throw Error (String.Format ("Expected '{0}' but got {1}", (char) c, (char) p));
288
private string ReadUntil (char until, bool handleReferences)
292
throw UnexpectedEndError ();
293
char c = (char) Read ();
296
else if (handleReferences && c == '&')
301
string ret = buffer.ToString ();
306
public string ReadName ()
309
if (Peek () < 0 || !IsNameChar ((char) Peek (), true))
310
throw Error ("XML name start character is expected.");
311
for (int i = Peek (); i >= 0; i = Peek ()) {
313
if (!IsNameChar (c, false))
315
if (idx == nameBuffer.Length) {
316
char [] tmp = new char [idx * 2];
317
// COMPACT FRAMEWORK NOTE: Array.Copy(sourceArray, destinationArray, count) is not available.
318
Array.Copy (nameBuffer, 0, tmp, 0, idx);
321
nameBuffer [idx++] = c;
325
throw Error ("Valid XML name is expected.");
326
return new string (nameBuffer, 0, idx);
330
public void Parse (TextReader input, IContentHandler handler)
333
this.handler = handler;
335
handler.OnStartParsing (this);
339
HandleBufferedContent ();
340
if (elementNames.Count > 0)
341
throw Error (String.Format ("Insufficient close tag: {0}", elementNames.Peek ()));
343
handler.OnEndParsing (this);
348
private void Cleanup ()
355
elementNames = new Stack ();
356
xmlSpaces = new Stack ();
358
elementNames.Clear ();
364
isWhitespace = false;
367
public void ReadContent ()
370
if (IsWhitespace (Peek ())) {
371
if (buffer.Length == 0)
373
HandleWhitespaces ();
375
if (Peek () == '<') {
378
case '!': // declarations
380
if (Peek () == '[') {
382
if (ReadName () != "CDATA")
383
throw Error ("Invalid declaration markup");
388
else if (Peek () == '-') {
392
else if (ReadName () != "DOCTYPE")
393
throw Error ("Invalid declaration markup.");
395
throw Error ("This parser does not support document type.");
397
HandleBufferedContent ();
401
string text = String.Empty;
402
if (Peek () != '?') {
404
text += ReadUntil ('?', false);
410
handler.OnProcessingInstruction (
414
case '/': // end tags
415
HandleBufferedContent ();
416
if (elementNames.Count == 0)
417
throw UnexpectedEndError ();
421
string expected = (string) elementNames.Pop ();
423
if (xmlSpaces.Count > 0)
424
xmlSpace = (string) xmlSpaces.Peek ();
427
if (name != expected)
428
throw Error (String.Format ("End tag mismatch: expected {0} but found {1}", expected, name));
429
handler.OnEndElement (name);
432
default: // start tags (including empty tags)
433
HandleBufferedContent ();
435
while (Peek () != '>' && Peek () != '/')
436
ReadAttribute (attributes);
437
handler.OnStartElement (name, attributes);
440
if (Peek () == '/') {
442
handler.OnEndElement (name);
445
elementNames.Push (name);
446
xmlSpaces.Push (xmlSpace);
456
private void HandleBufferedContent ()
458
if (buffer.Length == 0)
461
handler.OnIgnorableWhitespace (buffer.ToString ());
463
handler.OnChars (buffer.ToString ());
465
isWhitespace = false;
468
private void ReadCharacters ()
470
isWhitespace = false;
483
buffer.Append ((char) Read ());
489
private void ReadReference ()
491
if (Peek () == '#') {
492
// character reference
494
ReadCharacterReference ();
496
string name = ReadName ();
506
buffer.Append ('\'');
515
throw Error ("General non-predefined entity reference is not supported in this parser.");
520
private int ReadCharacterReference ()
523
if (Peek () == 'x') { // hex
525
for (int i = Peek (); i >= 0; i = Peek ()) {
526
if ('0' <= i && i <= '9')
527
n = n << 4 + i - '0';
528
else if ('A' <= i && i <='F')
529
n = n << 4 + i - 'A' + 10;
530
else if ('a' <= i && i <='f')
531
n = n << 4 + i - 'a' + 10;
537
for (int i = Peek (); i >= 0; i = Peek ()) {
538
if ('0' <= i && i <= '9')
539
n = n << 4 + i - '0';
548
private void ReadAttribute (AttrListImpl a)
550
SkipWhitespaces (true);
551
if (Peek () == '/' || Peek () == '>')
552
// came here just to spend trailing whitespaces
555
string name = ReadName ();
562
value = ReadUntil ('\'', true);
565
value = ReadUntil ('"', true);
568
throw Error ("Invalid attribute value markup.");
570
if (name == "xml:space")
575
private void ReadCDATASection ()
580
throw UnexpectedEndError ();
581
char c = (char) Read ();
584
else if (c == '>' && nBracket > 1) {
585
for (int i = nBracket; i > 2; i--)
590
for (int i = 0; i < nBracket; i++)
598
private void ReadComment ()
608
throw Error ("'--' is not allowed inside comment markup.");
614
internal sealed class SmallXmlParserException : SystemException
619
public SmallXmlParserException (string msg, int line, int column)
620
: base (String.Format ("{0}. At ({1},{2})", msg, line, column))
623
this.column = column;
631
get { return column; }