2
* Implementation of the Pedro mini-DOM parser and tree
7
* Copyright (C) 2005 Bob Jamison
9
* This library is free software; you can redistribute it and/or
10
* modify it under the terms of the GNU Lesser General Public
11
* License as published by the Free Software Foundation; either
12
* version 2.1 of the License, or (at your option) any later version.
14
* This library is distributed in the hope that it will be useful,
15
* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
* Lesser General Public License for more details.
19
* You should have received a copy of the GNU Lesser General Public
20
* License along with this library; if not, write to the Free Software
21
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
32
#include <sys/types.h>
43
//########################################################################
45
//########################################################################
47
Element *Element::clone()
49
Element *elem = new Element(name, value);
50
elem->parent = parent;
51
elem->attributes = attributes;
52
elem->namespaces = namespaces;
54
std::vector<Element *>::iterator iter;
55
for (iter = children.begin(); iter != children.end() ; iter++)
57
elem->addChild((*iter)->clone());
63
void Element::findElementsRecursive(std::vector<Element *>&res, const DOMString &name)
65
if (getName() == name)
69
for (unsigned int i=0; i<children.size() ; i++)
70
children[i]->findElementsRecursive(res, name);
73
std::vector<Element *> Element::findElements(const DOMString &name)
75
std::vector<Element *> res;
76
findElementsRecursive(res, name);
80
DOMString Element::getAttribute(const DOMString &name)
82
for (unsigned int i=0 ; i<attributes.size() ; i++)
83
if (attributes[i].getName() ==name)
84
return attributes[i].getValue();
88
DOMString Element::getTagAttribute(const DOMString &tagName, const DOMString &attrName)
90
std::vector<Element *>elems = findElements(tagName);
93
DOMString res = elems[0]->getAttribute(attrName);
97
DOMString Element::getTagValue(const DOMString &tagName)
99
std::vector<Element *>elems = findElements(tagName);
102
DOMString res = elems[0]->getValue();
106
void Element::addChild(Element *child)
110
child->parent = this;
111
children.push_back(child);
115
void Element::addAttribute(const DOMString &name, const DOMString &value)
117
Attribute attr(name, value);
118
attributes.push_back(attr);
121
void Element::addNamespace(const DOMString &prefix, const DOMString &namespaceURI)
123
Namespace ns(prefix, namespaceURI);
124
namespaces.push_back(ns);
127
void Element::writeIndentedRecursive(FILE *f, int indent)
132
//Opening tag, and attributes
133
for (i=0;i<indent;i++)
135
fprintf(f,"<%s",name.c_str());
136
for (unsigned int i=0 ; i<attributes.size() ; i++)
138
fprintf(f," %s=\"%s\"",
139
attributes[i].getName().c_str(),
140
attributes[i].getValue().c_str());
142
for (unsigned int i=0 ; i<namespaces.size() ; i++)
144
fprintf(f," xmlns:%s=\"%s\"",
145
namespaces[i].getPrefix().c_str(),
146
namespaces[i].getNamespaceURI().c_str());
151
if (value.size() > 0)
153
for (int i=0;i<indent;i++)
155
fprintf(f," %s\n", value.c_str());
158
for (unsigned int i=0 ; i<children.size() ; i++)
159
children[i]->writeIndentedRecursive(f, indent+2);
162
for (int i=0; i<indent; i++)
164
fprintf(f,"</%s>\n", name.c_str());
167
void Element::writeIndented(FILE *f)
169
writeIndentedRecursive(f, 0);
172
void Element::print()
174
writeIndented(stdout);
178
//########################################################################
180
//########################################################################
190
static EntityEntry entities[] =
202
void Parser::getLineAndColumn(long pos, long *lineNr, long *colNr)
206
for (long i=0 ; i<pos ; i++)
208
XMLCh ch = parsebuf[i];
209
if (ch == '\n' || ch == '\r')
223
void Parser::error(char *fmt, ...)
227
getLineAndColumn(currentPosition, &lineNr, &colNr);
229
fprintf(stderr, "xml error at line %ld, column %ld:", lineNr, colNr);
231
vfprintf(stderr,fmt,args);
233
fprintf(stderr, "\n");
238
int Parser::peek(long pos)
242
currentPosition = pos;
243
int ch = parsebuf[pos];
244
//printf("ch:%c\n", ch);
250
DOMString Parser::encode(const DOMString &str)
253
for (unsigned int i=0 ; i<str.size() ; i++)
255
XMLCh ch = (XMLCh)str[i];
263
ret.append("'");
265
ret.append(""");
274
int Parser::match(long p0, const char *text)
279
if (peek(p) != *text)
288
int Parser::skipwhite(long p)
293
int p2 = match(p, "<!--");
299
p2 = match(p, "-->");
316
/* modify this to allow all chars for an element or attribute name*/
317
int Parser::getWord(int p0, DOMString &buf)
323
if (b<=' ' || b=='/' || b=='>' || b=='=')
331
int Parser::getQuoted(int p0, DOMString &buf, int do_i_parse)
335
if (peek(p) != '"' && peek(p) != '\'')
342
if (b=='"' || b=='\'')
344
if (b=='&' && do_i_parse)
347
for (EntityEntry *ee = entities ; ee->value ; ee++)
349
int p2 = match(p, ee->escaped);
352
buf.push_back(ee->value);
360
error("unterminated entity");
373
int Parser::parseVersion(int p0)
375
//printf("### parseVersion: %d\n", p0);
385
if (p>=parselen || peek(p)!='?')
408
//printf("Got version:%s\n",buf.c_str());
412
int Parser::parseDoctype(int p0)
414
//printf("### parseDoctype: %d\n", p0);
419
if (p>=parselen || peek(p)!='<')
424
if (peek(p)!='!' || peek(p+1)=='-')
441
//printf("Got doctype:%s\n",buf.c_str());
445
int Parser::parseElement(int p0, Element *par,int depth)
461
DOMString openTagName;
463
p = getWord(p, openTagName);
464
//printf("####tag :%s\n", openTagName.c_str());
467
//Add element to tree
468
Element *n = new Element(openTagName);
479
//printf("ch:%c\n",ch);
482
else if (ch=='/' && p<parselen+1)
490
//printf("quick close\n");
495
p2 = getWord(p, attrName);
498
//printf("name:%s",buf);
502
//printf("ch:%c\n",ch);
508
// printf("ch:%c\n",ch);
510
p2 = getQuoted(p, attrVal, true);
512
//printf("name:'%s' value:'%s'\n",attrName.c_str(),attrVal.c_str());
513
char *namestr = (char *)attrName.c_str();
514
if (strncmp(namestr, "xmlns:", 6)==0)
515
n->addNamespace(attrName, attrVal);
517
n->addAttribute(attrName, attrVal);
524
// ### Get intervening data ### */
529
p2 = match(p, "<!--");
535
p2 = match(p, "-->");
547
if (ch=='<' && !cdata && peek(p+1)=='/')
552
p2 = match(p, "<![CDATA[");
563
p2 = parseElement(p, n, depth+1);
567
printf("problem on element:%s. p2:%d p:%d\n",
568
openTagName.c_str(), p2, p);
576
if (ch=='&' && !cdata)
579
for (EntityEntry *ee = entities ; ee->value ; ee++)
581
int p2 = match(p, ee->escaped);
584
data.push_back(ee->value);
592
error("unterminated entity");
598
//# NONE OF THE ABOVE
605
//printf("%d : data:%s\n",p,data.c_str());
612
error("no < for end tag\n");
619
error("no / on end tag");
625
DOMString closeTagName;
626
p = getWord(p, closeTagName);
627
if (openTagName != closeTagName)
629
error("Mismatched closing tag. Expected </%S>. Got '%S'.",
630
openTagName.c_str(), closeTagName.c_str());
636
error("no > on end tag for '%s'", closeTagName.c_str());
640
// printf("close element:%s\n",closeTagName.c_str());
648
Element *Parser::parse(XMLCh *buf,int pos,int len)
652
Element *rootNode = new Element("root");
653
pos = parseVersion(pos);
654
pos = parseDoctype(pos);
655
pos = parseElement(pos, rootNode, 0);
660
Element *Parser::parse(const char *buf, int pos, int len)
663
XMLCh *charbuf = (XMLCh *)malloc((len+1) * sizeof(XMLCh));
667
charbuf[i] = (XMLCh)buf[i];
671
Element *n = parse(charbuf, 0, len);
676
Element *Parser::parse(const DOMString &buf)
678
long len = buf.size();
679
XMLCh *charbuf = (XMLCh *)malloc((len+1) * sizeof(XMLCh));
683
charbuf[i] = (XMLCh)buf[i];
687
Element *n = parse(charbuf, 0, len);
692
Element *Parser::parseFile(const char *fileName)
695
//##### LOAD INTO A CHAR BUF, THEN CONVERT TO XMLCh
699
FILE *f = fopen(fileName, "rb");
704
if (fstat(fileno(f),&statBuf)<0)
709
long filelen = statBuf.st_size;
711
//printf("length:%d\n",filelen);
712
XMLCh *charbuf = (XMLCh *)malloc((filelen+1) * sizeof(XMLCh));
713
for (XMLCh *p=charbuf ; !feof(f) ; p++)
715
*p = (XMLCh)fgetc(f);
718
charbuf[filelen] = '\0';
722
printf("nrbytes:%d\n",wc_count);
723
printf("buf:%ls\n======\n",charbuf);
725
Element *n = parse(charbuf, 0, filelen);
739
//########################################################################
741
//########################################################################
743
bool doTest(char *fileName)
745
Pedro::Parser parser;
747
Pedro::Element *elem = parser.parseFile(fileName);
751
printf("Parsing failed\n");
764
int main(int argc, char **argv)
768
printf("usage: %s <xmlfile>\n", argv[0]);
772
if (!doTest(argv[1]))
780
//########################################################################
781
//# E N D O F F I L E
782
//########################################################################