2
* parser.cpp - parse an XMPP "document"
3
* Copyright (C) 2003 Justin Karneges
5
* This library is free software; you can redistribute it and/or
6
* modify it under the terms of the GNU Lesser General Public
7
* License as published by the Free Software Foundation; either
8
* version 2.1 of the License, or (at your option) any later version.
10
* This library is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
* Lesser General Public License for more details.
15
* You should have received a copy of the GNU Lesser General Public
16
* License along with this library; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24
For XMPP::Parser to be "perfect", some things must be solved/changed in the
27
- Fix weird QDomElement::haveAttributeNS() bug (patch submitted to
28
Trolltech on Aug 31st, 2003).
29
- Fix weird behavior in QXmlSimpleReader of reporting endElement() when
30
the '/' character of a self-closing tag is reached, instead of when
31
the final '>' is reached.
32
- Fix incremental parsing bugs in QXmlSimpleReader. At the moment, the
33
only bug I've found is related to attribute parsing, but there might
34
be more (search for '###' in $QTDIR/src/xml/qxml.cpp).
36
We have workarounds for all of the above problems in the code below.
38
- Deal with the <?xml?> processing instruction as an event type, so that we
39
can feed it back to the application properly. Right now it is completely
40
untrackable and is simply tacked into the first event's actualString. We
41
can't easily do this because QXmlSimpleReader eats an extra byte beyond
42
the processing instruction before reporting it.
44
- Make QXmlInputSource capable of accepting data incrementally, to ensure
45
proper text encoding detection and processing over a network. This is
46
technically not a bug, as we have our own subclass below to do it, but
47
it would be nice if Qt had this already.
52
#include <qtextcodec.h>
53
#include <q3ptrlist.h>
58
static bool qt_bug_check = false;
59
static bool qt_bug_have;
61
//----------------------------------------------------------------------------
63
//----------------------------------------------------------------------------
64
class StreamInput : public QXmlInputSource
86
mightChangeEncoding = true;
98
QString lastString() const
103
void appendData(const QByteArray &a)
105
int oldsize = in.size();
106
in.resize(oldsize + a.size());
107
memcpy(in.data() + oldsize, a.data(), a.size());
124
// NOTE: setting 'peek' to true allows the same char to be read again,
125
// however this still advances the internal byte processing.
126
QChar readNext(bool peek=false)
129
if(mightChangeEncoding)
134
if(!tryExtractPart(&s))
147
#ifdef XMPP_PARSER_DEBUG
148
printf("next() = EOD\n");
152
#ifdef XMPP_PARSER_DEBUG
153
printf("next() = [%c]\n", c.latin1());
161
QByteArray unprocessed() const
163
QByteArray a(in.size() - at);
164
memcpy(a.data(), in.data() + at, a.size());
178
QString encoding() const
189
bool mightChangeEncoding;
197
#ifdef XMPP_PARSER_DEBUG
198
printf("processing. size=%d, at=%d\n", in.size(), at);
201
QTextCodec *codec = 0;
202
uchar *p = (uchar *)in.data() + at;
203
int size = in.size() - at;
205
// do we have enough information to determine the encoding?
209
if(p[0] == 0xfe || p[0] == 0xff) {
210
// probably going to be a UTF-16 byte order mark
213
if((p[0] == 0xfe && p[1] == 0xff) || (p[0] == 0xff && p[1] == 0xfe)) {
219
codec = QTextCodec::codecForMib(1000); // UTF-16
221
codec = QTextCodec::codecForMib(106); // UTF-8
223
v_encoding = codec->name();
224
dec = codec->makeDecoder();
226
// for utf16, put in the byte order mark
228
out += dec->toUnicode((const char *)p, 2);
233
if(mightChangeEncoding) {
235
int n = out.find('<');
237
// we need a closing bracket
238
int n2 = out.find('>', n);
241
QString h = out.mid(n, n2-n);
242
QString enc = processXmlHeader(h);
243
QTextCodec *codec = 0;
245
codec = QTextCodec::codecForName(enc.latin1());
249
v_encoding = codec->name();
251
dec = codec->makeDecoder();
253
mightChangeEncoding = false;
261
if(!tryExtractPart(&s))
263
if(checkBad && checkForBadChars(s)) {
265
mightChangeEncoding = false;
276
QString processXmlHeader(const QString &h)
278
if(h.left(5) != "<?xml")
281
int endPos = h.find(">");
282
int startPos = h.find("encoding");
283
if(startPos < endPos && startPos != -1) {
287
if(startPos > endPos) {
290
} while(h[startPos] != '"' && h[startPos] != '\'');
292
while(h[startPos] != '"' && h[startPos] != '\'') {
293
encoding += h[startPos];
295
if(startPos > endPos) {
305
bool tryExtractPart(QString *s)
307
int size = in.size() - at;
310
uchar *p = (uchar *)in.data() + at;
313
nextChars = dec->toUnicode((const char *)p, 1);
316
if(!nextChars.isEmpty())
318
if(at == (int)in.size())
321
last_string += nextChars;
324
// free processed data?
327
int size = in.size() - at;
328
memmove(p, p + at, size);
336
bool checkForBadChars(const QString &s)
338
int len = s.find('<');
343
for(int n = 0; n < len; ++n) {
344
if(!s.at(n).isSpace())
352
//----------------------------------------------------------------------------
354
//----------------------------------------------------------------------------
357
class ParserHandler : public QXmlDefaultHandler
360
ParserHandler(StreamInput *_in, QDomDocument *_doc)
369
eventList.setAutoDelete(true);
384
bool startPrefixMapping(const QString &prefix, const QString &uri)
393
bool startElement(const QString &namespaceURI, const QString &localName, const QString &qName, const QXmlAttributes &atts)
396
Parser::Event *e = new Parser::Event;
398
for(int n = 0; n < atts.length(); ++n) {
399
QString uri = atts.uri(n);
400
QString ln = atts.localName(n);
401
if(a.index(uri, ln) == -1)
402
a.append(atts.qName(n), uri, ln, atts.value(n));
404
e->setDocumentOpen(namespaceURI, localName, qName, a, nsnames, nsvalues);
407
e->setActualString(in->lastString());
414
QDomElement e = doc->createElementNS(namespaceURI, qName);
415
for(int n = 0; n < atts.length(); ++n) {
416
QString uri = atts.uri(n);
417
QString ln = atts.localName(n);
420
have = e.hasAttributeNS(uri, ln);
425
have = e.hasAttribute(ln);
427
e.setAttributeNS(uri, atts.qName(n), atts.value(n));
435
current.appendChild(e);
443
bool endElement(const QString &namespaceURI, const QString &localName, const QString &qName)
447
Parser::Event *e = new Parser::Event;
448
e->setDocumentClose(namespaceURI, localName, qName);
449
e->setActualString(in->lastString());
455
// done with a depth 1 element?
457
Parser::Event *e = new Parser::Event;
459
e->setActualString(in->lastString());
464
elem = QDomElement();
465
current = QDomElement();
468
current = current.parentNode().toElement();
471
if(in->lastRead() == '/')
477
bool characters(const QString &str)
480
QString content = str;
481
if(content.isEmpty())
484
if(!current.isNull()) {
485
QDomText text = doc->createTextNode(content);
486
current.appendChild(text);
492
/*bool processingInstruction(const QString &target, const QString &data)
494
printf("Processing: [%s], [%s]\n", target.latin1(), data.latin1());
501
// Here we will work around QXmlSimpleReader strangeness and self-closing tags.
502
// The problem is that endElement() is called when the '/' is read, not when
503
// the final '>' is read. This is a potential problem when obtaining unprocessed
504
// bytes from StreamInput after this event, as the '>' character will end up
505
// in the unprocessed chunk. To work around this, we need to advance StreamInput's
506
// internal byte processing, but not the xml character data. This way, the '>'
507
// will get processed and will no longer be in the unprocessed return, but
508
// QXmlSimpleReader can still read it. To do this, we call StreamInput::readNext
510
QChar c = in->readNext(true); // peek
511
if(c == QXmlInputSource::EndOfData) {
515
// We'll assume the next char is a '>'. If it isn't, then
516
// QXmlSimpleReader will deal with that problem on the next
517
// parse. We don't need to take any action here.
520
// there should have been a pending event
521
Parser::Event *e = eventList.getFirst();
523
e->setActualString(e->actualString() + '>');
529
Parser::Event *takeEvent()
533
if(eventList.isEmpty())
536
Parser::Event *e = eventList.getFirst();
537
eventList.removeRef(e);
545
QStringList nsnames, nsvalues;
546
QDomElement elem, current;
547
Q3PtrList<Parser::Event> eventList;
553
//----------------------------------------------------------------------------
555
//----------------------------------------------------------------------------
556
class Parser::Event::Private
564
QStringList nsnames, nsvalues;
567
Parser::Event::Event()
572
Parser::Event::Event(const Event &from)
578
Parser::Event & Parser::Event::operator=(const Event &from)
583
d = new Private(*from.d);
587
Parser::Event::~Event()
592
bool Parser::Event::isNull() const
594
return (d ? false: true);
597
int Parser::Event::type() const
604
QString Parser::Event::nsprefix(const QString &s) const
606
QStringList::ConstIterator it = d->nsnames.begin();
607
QStringList::ConstIterator it2 = d->nsvalues.begin();
608
for(; it != d->nsnames.end(); ++it) {
613
return QString::null;
616
QString Parser::Event::namespaceURI() const
621
QString Parser::Event::localName() const
626
QString Parser::Event::qName() const
631
QXmlAttributes Parser::Event::atts() const
636
QString Parser::Event::actualString() const
641
QDomElement Parser::Event::element() const
646
void Parser::Event::setDocumentOpen(const QString &namespaceURI, const QString &localName, const QString &qName, const QXmlAttributes &atts, const QStringList &nsnames, const QStringList &nsvalues)
650
d->type = DocumentOpen;
651
d->ns = namespaceURI;
655
d->nsnames = nsnames;
656
d->nsvalues = nsvalues;
659
void Parser::Event::setDocumentClose(const QString &namespaceURI, const QString &localName, const QString &qName)
663
d->type = DocumentClose;
664
d->ns = namespaceURI;
669
void Parser::Event::setElement(const QDomElement &elem)
677
void Parser::Event::setError()
684
void Parser::Event::setActualString(const QString &str)
689
//----------------------------------------------------------------------------
691
//----------------------------------------------------------------------------
692
class Parser::Private
709
void reset(bool create=true)
717
doc = new QDomDocument;
718
in = new StreamInput;
719
handler = new ParserHandler(in, doc);
720
reader = new QXmlSimpleReader;
721
reader->setContentHandler(handler);
723
// initialize the reader
725
reader->parse(in, true);
732
ParserHandler *handler;
733
QXmlSimpleReader *reader;
740
// check for evil bug in Qt <= 3.2.1
743
QDomElement e = d->doc->createElementNS("someuri", "somename");
744
if(e.hasAttributeNS("someuri", "somename"))
761
void Parser::appendData(const QByteArray &a)
763
d->in->appendData(a);
765
// if handler was waiting for more, give it a kick
766
if(d->handler->needMore)
767
d->handler->checkNeedMore();
770
Parser::Event Parser::readNext()
773
if(d->handler->needMore)
775
Event *ep = d->handler->takeEvent();
777
if(!d->reader->parseContinue()) {
781
ep = d->handler->takeEvent();
790
QByteArray Parser::unprocessed() const
792
return d->in->unprocessed();
795
QString Parser::encoding() const
797
return d->in->encoding();