11
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
12
GNU General Public License for more details.
13
13
***********************************************************************/
14
#include <openbabel/babelconfig.h>
15
#include <openbabel/xml.h>
17
17
using namespace std;
18
18
namespace OpenBabel
22
XMLBaseFormat* XMLConversion::_pDefault=NULL;
24
XMLConversion::XMLConversion(OBConversion* pConv)
25
: OBConversion(*pConv), _reader(NULL), _writer(NULL),
26
_LookingForNamespace(false),_SkipNextRead(false),
27
_lastpos(0), _requestedpos(0)
30
pConv->SetAuxConv(this);//marks original OBConversion object as having been extended
31
SetAuxConv(this);//marks this new object as extended (for use with OBConversion pointer)
34
bool XMLConversion::SetupReader()
37
return true; //do not need to make a new reader
39
//If the inputstream is not at the start (probably arising in fastsearch),
40
//save its position and rewind so that the reader initialization is ok.
41
//(Getting the requested object is handled in ReadXML(), when the format is known.)
42
_requestedpos = GetInStream()->tellg();
44
GetInStream()->seekg(0);
46
//Set up a parser from an input stream
47
_reader = xmlReaderForIO(
48
ReadStream, //xmlInputReadCallback (static member function)
49
NULL,//xmlInputCloseCallback (static member function)
57
cerr << "Cannot set up libxml2 reader" << endl;
60
//A new reader immediately reads 4 bytes (presumably to determine
62
_lastpos = GetInStream()->tellg();
66
bool XMLConversion::SetupWriter()
68
//Set up XML writer if one does not already exist
22
XMLBaseFormat* XMLConversion::_pDefault=NULL;
24
XMLConversion::XMLConversion(OBConversion* pConv)
25
: OBConversion(*pConv),
26
_requestedpos(0), _lastpos(0),
27
_reader(NULL), _writer(NULL),
28
_LookingForNamespace(false), _SkipNextRead(false)
31
pConv->SetAuxConv(this);//marks original OBConversion object as having been extended
32
SetAuxConv(this);//marks this new object as extended (for use with OBConversion pointer)
35
bool XMLConversion::SetupReader()
38
return true; //do not need to make a new reader
40
//If the inputstream is not at the start (probably arising in fastsearch),
41
//save its position and rewind so that the reader initialization is ok.
42
//(Getting the requested object is handled in ReadXML(), when the format is known.)
43
_requestedpos = GetInStream()->tellg();
45
GetInStream()->seekg(0);
47
//Set up a parser from an input stream
48
_reader = xmlReaderForIO(
49
ReadStream, //xmlInputReadCallback (static member function)
50
NULL,//xmlInputCloseCallback (static member function)
58
cerr << "Cannot set up libxml2 reader" << endl;
61
//A new reader immediately reads 4 bytes (presumably to determine
63
_lastpos = GetInStream()->tellg();
67
bool XMLConversion::SetupWriter()
69
//Set up XML writer if one does not already exist
72
_buf = xmlOutputBufferCreateIO (
73
WriteStream, //xmlOutputWriteCallback
74
NULL, //xmlOutputCloseCallback
76
NULL); //xmlCharEncodingHandlerPtr
77
_writer = xmlNewTextWriter(_buf);
80
_buf = xmlBufferCreate();
81
_writer = xmlNewTextWriterMemory(_buf, 0);
73
_buf = xmlOutputBufferCreateIO (
74
WriteStream, //xmlOutputWriteCallback
75
NULL, //xmlOutputCloseCallback
77
NULL); //xmlCharEncodingHandlerPtr
78
_writer = xmlNewTextWriter(_buf);
82
cerr << "Error setting up xml writer\n" << endl;
88
ret = xmlTextWriterSetIndent(_writer,0);
91
ret = xmlTextWriterSetIndent(_writer,1);
92
ret = xmlTextWriterSetIndentString(_writer, BAD_CAST " ");
97
XMLConversion::~XMLConversion()
100
xmlFreeTextReader(_reader);
102
// xmlTextWriterEndDocument(_writer); //if hasn't been called ealier
103
xmlFreeTextWriter(_writer);// was crashing
104
//xmlBufferFree(_buf);
107
///Called from each XML class during its construction
108
void XMLConversion::RegisterXMLFormat(XMLBaseFormat* pFormat, bool IsDefault, const char* uri)
110
if(IsDefault || Namespaces().empty())
113
Namespaces()[uri] = pFormat;
115
Namespaces()[pFormat->NamespaceURI()] = pFormat;
118
///Returns the extended form of the OBConversion object with an xml reader or writer,
119
/// if this has not already been done.
120
XMLConversion* XMLConversion::GetDerived(OBConversion* pConv, bool ForReading)
122
XMLConversion* pxmlConv;
123
if(!pConv->GetAuxConv())
124
//Need to make an extended copy. It will be deleted by pConv's destructor
125
pxmlConv = new XMLConversion(pConv);
128
//pConv has already had an extended copy made
129
pxmlConv = dynamic_cast<XMLConversion*>(pConv->GetAuxConv());
136
pxmlConv->SetupReader();
137
if(pConv->GetInStream()->tellg() < pxmlConv->_lastpos)
139
//Probably a new file; copy some member vars and renew the current reader
140
pxmlConv->InFilename = pConv->GetInFilename();
141
pxmlConv->pInFormat = pConv->GetInFormat();
143
if(xmlReaderNewIO( pxmlConv->_reader, ReadStream, NULL, pxmlConv, "", NULL, 0)==-1)
149
pxmlConv->SetupWriter();
150
pxmlConv->SetLast(pConv->IsLast()); //Copy IsLast flag to the extended object
156
bool XMLConversion::ReadXML(XMLBaseFormat* pFormat, OBBase* pOb)
160
//The initial stream position was not at the start, probably because of fastsearch
161
//Read and discard the first object to synchronize the reader,
162
//then continue getting the requested object.
163
//Assumes the objects are all at the same level in the DOM tree.
164
SetOneObjectOnly(); //probably already set
165
streampos SavedReqestedPos = _requestedpos;
166
_requestedpos=0;//don't do this again
167
ReadXML(pFormat,pOb);
168
GetInStream()->seekg(SavedReqestedPos);
173
while(GetInStream()->good() && (_SkipNextRead || (result=xmlTextReaderRead(_reader))==1)) //read may not be called
176
if(_LookingForNamespace)
178
const xmlChar* puri = xmlTextReaderConstNamespaceUri(_reader);
181
string uri((const char*)puri);
182
//Look up appropriate format class from the namespace URI
183
NsMapType::iterator nsiter;
184
nsiter = Namespaces().find(uri);
185
if(nsiter!=Namespaces().end())
187
XMLBaseFormat* pNewFormat = nsiter->second;
188
//Must have same target, e.g. OBMol, as current format
189
if(pNewFormat->GetType() == pFormat->GetType())
191
_LookingForNamespace=false;
193
SetInFormat(pNewFormat);
194
pNewFormat->ReadMolecule(pOb,this);
201
const xmlChar* pname = xmlTextReaderConstLocalName(_reader);
202
int typ = xmlTextReaderNodeType(_reader);
203
if(typ==XML_READER_TYPE_SIGNIFICANT_WHITESPACE || !pname)
204
continue; //Text nodes handled in format class
205
string ElName((const char*)pname);
207
//Pass the node on to the appropriate format class
209
if(typ==XML_READER_TYPE_ELEMENT)
210
ret= pFormat->DoElement(ElName);
211
else if(typ==XML_READER_TYPE_END_ELEMENT)
212
ret= pFormat->EndElement(ElName);
215
_lastpos = GetInStream()->tellg();
218
//derived format callback has stopped processing by returning false;
219
//leave reader intact so it can be continued to be used.
220
if(!IsOption("n",OBConversion::INOPTIONS))
222
_LookingForNamespace = true;
229
xmlError* perr = xmlGetLastError();
230
if(perr && perr->level!=XML_ERR_NONE)
232
obErrorLog.ThrowError("XML Parser " + GetInFilename(),
233
perr->message, obError);
236
GetInStream()->setstate(ios::eofbit);
239
return GetInStream()->good() && result!=0;
242
/////////////////////////////////////////////////////////
243
///Read and discard XML text up to the next occurrence of the tag e.g."/molecule>"
244
///This is left as the current node. Returns 1 on success, 0 if not found, -1 if failed.
245
int XMLConversion::SkipXML(const char* ctag)
248
tag.erase(--tag.end()); //remove >
249
int targettyp = XML_READER_TYPE_ELEMENT;
253
targettyp = XML_READER_TYPE_END_ELEMENT;
257
while((result = xmlTextReaderRead(_reader))==1)
259
if(xmlTextReaderNodeType(_reader)==targettyp
260
&& !xmlStrcmp(xmlTextReaderConstLocalName(_reader), BAD_CAST tag.c_str()))
265
/////////////////////////////////////////////////////////
266
string XMLConversion::GetAttribute(const char* attrname)
268
string AttributeValue;
269
xmlChar* pvalue = xmlTextReaderGetAttribute(_reader, BAD_CAST attrname);
272
AttributeValue = (const char*)pvalue;
275
return AttributeValue;
278
////////////////////////////////////////////////////////
279
string XMLConversion::GetContent()
281
xmlTextReaderRead(_reader);
282
const xmlChar* pvalue = xmlTextReaderConstValue(_reader);
283
string value((const char*)pvalue);
287
////////////////////////////////////////////////////////
288
bool XMLConversion::GetContentInt(int& value)
290
xmlTextReaderRead(_reader);
291
const xmlChar* pvalue = xmlTextReaderConstValue(_reader);
294
value = atoi((const char*)pvalue);
298
////////////////////////////////////////////////////////
299
bool XMLConversion::GetContentDouble(double& value)
301
xmlTextReaderRead(_reader);
302
const xmlChar* pvalue = xmlTextReaderConstValue(_reader);
305
value = strtod((const char*)pvalue,NULL);
309
////////////////////////////////////////////////////////
310
///Static callback function for xmlReaderForIO(). Reads up to the next '>', or len chars.
312
int XMLConversion::ReadStream(void * context, char * buffer, int len)
314
//TODO worry about non-ascii coding
315
XMLConversion* pConv = static_cast<XMLConversion*>(context);
316
istream* ifs = pConv->GetInStream();
317
if(!ifs->good() || ifs->eof())
86
cerr << "Error setting up xml writer\n" << endl;
90
int ret = xmlTextWriterSetIndent(_writer,1);
91
ret = xmlTextWriterSetIndentString(_writer, BAD_CAST " ");
95
XMLConversion::~XMLConversion()
98
xmlFreeTextReader(_reader);
100
// xmlFreeTextWriter(_writer); was crashing
101
//xmlBufferFree(_buf);
104
///Called from each XML class during its construction
105
void XMLConversion::RegisterXMLFormat(XMLBaseFormat* pFormat, bool IsDefault, const char* uri)
107
if(IsDefault || Namespaces().empty())
110
Namespaces()[uri] = pFormat;
112
Namespaces()[pFormat->NamespaceURI()] = pFormat;
115
XMLConversion* XMLConversion::GetDerived(OBConversion* pConv, bool ForReading)
117
XMLConversion* pxmlConv;
118
if(!pConv->GetAuxConv())
119
//Need to make an extended copy. It will be deleted by pConv's destructor
120
pxmlConv = new XMLConversion(pConv);
123
//pConv has already had an extended copy made
124
pxmlConv = dynamic_cast<XMLConversion*>(pConv->GetAuxConv());
131
pxmlConv->SetupReader();
132
if(pConv->GetInStream()->tellg() < pxmlConv->_lastpos)
134
//Probably a new file; copy some member vars and renew the current reader
135
pxmlConv->InFilename = pConv->GetInFilename();
136
pxmlConv->pInFormat = pConv->GetInFormat();
138
if(xmlReaderNewIO( pxmlConv->_reader, ReadStream, NULL, pxmlConv, "", NULL, 0)==-1)
143
pxmlConv->SetupWriter();
149
bool XMLConversion::ReadXML(XMLBaseFormat* pFormat, OBBase* pOb)
153
//The initial stream position was not at the start, probably because of fastsearch
154
//Read and discard the first object to synchronize the reader,
155
//then continue getting the requested object.
156
//Assumes the objects are all at the same level in the DOM tree.
157
SetOneObjectOnly(); //probably already set
158
streampos SavedReqestedPos = _requestedpos;
159
_requestedpos=0;//don't do this again
160
ReadXML(pFormat,pOb);
161
GetInStream()->seekg(SavedReqestedPos);
166
while(GetInStream()->good() &&
167
(_SkipNextRead || (result=xmlTextReaderRead(_reader))==1)) //read may not be called
170
if(_LookingForNamespace)
172
const xmlChar* puri = xmlTextReaderConstNamespaceUri(_reader);
175
string uri((const char*)puri);
176
//Look up appropriate format class from the namespace URI
177
NsMapType::iterator nsiter;
178
nsiter = Namespaces().find(uri);
179
if(nsiter!=Namespaces().end())
181
XMLBaseFormat* pNewFormat = nsiter->second;
182
//Must have same target, e.g. OBMol, as current format
183
if(pNewFormat->GetType() == pFormat->GetType())
185
_LookingForNamespace=false;
187
SetInFormat(pNewFormat);
188
pNewFormat->ReadMolecule(pOb,this);
195
const xmlChar* pname = xmlTextReaderConstLocalName(_reader);
196
int typ = xmlTextReaderNodeType(_reader);
197
if(typ==XML_READER_TYPE_SIGNIFICANT_WHITESPACE || !pname)
198
continue; //Text nodes handled in format class
199
string ElName((const char*)pname);
201
//Pass the node on to the appropriate format class
203
if(typ==XML_READER_TYPE_ELEMENT)
204
ret= pFormat->DoElement(ElName);
205
else if(typ==XML_READER_TYPE_END_ELEMENT)
206
ret= pFormat->EndElement(ElName);
208
_lastpos = GetInStream()->tellg();
211
//derived format callback has stopped processing by returning false;
212
//leave reader intact so it can be continued to be used.
213
if(!IsOption("n",OBConversion::INOPTIONS))
215
_LookingForNamespace = true;
222
cerr << "XML Parser failed in " << GetInFilename() << endl;
223
GetInStream()->setstate(ios::eofbit);
225
return (result==0);// was result==0;
228
/////////////////////////////////////////////////////////
229
string XMLConversion::GetAttribute(const char* attrname)
231
string AttributeValue;
232
const xmlChar* pvalue = xmlTextReaderGetAttribute(_reader, BAD_CAST attrname);
234
AttributeValue = (const char*)pvalue;
235
return AttributeValue;
238
////////////////////////////////////////////////////////
239
string XMLConversion::GetContent()
241
xmlTextReaderRead(_reader);
242
const xmlChar* pvalue = xmlTextReaderConstValue(_reader);
243
string value((const char*)pvalue);
247
////////////////////////////////////////////////////////
248
bool XMLConversion::GetContentInt(int& value)
250
xmlTextReaderRead(_reader);
251
const xmlChar* pvalue = xmlTextReaderConstValue(_reader);
254
value = atoi((const char*)pvalue);
258
////////////////////////////////////////////////////////
259
bool XMLConversion::GetContentDouble(double& value)
261
xmlTextReaderRead(_reader);
262
const xmlChar* pvalue = xmlTextReaderConstValue(_reader);
265
value = strtod((const char*)pvalue,NULL);
269
//**********************************************
270
/// Utility function to read an input stream until a specified string is found
271
streamsize gettomatch(istream& is, char* buf, streamsize count, const char* match)
273
//Reads chars from input stream into a buffer until either:
274
// count chars have been read or
275
// the string match has been input.
276
//The buffer is NOT terminated by a '\0' char.
277
//The number of characters stored in buf is returned.
280
char lastchar = EOF; //value if no vaild match provided
283
matchlength = strlen(match);
284
lastchar = match[matchlength-1];
287
streambuf* prb = is.rdbuf();
296
const char* mptr = match + matchlength-2; //last char is already matched
297
const char* bptr = p-2;
298
while((*mptr-- == *bptr--) && (mptr >= match));
303
break;//have found match
309
//***********************************************
311
///Static callback function for xmlReaderForIO()
312
int XMLConversion::ReadStream(void * context, char * buffer, int len)
314
//Reads up to the next '>'
315
XMLConversion* pConv = static_cast<XMLConversion*>(context);
316
istream* ifs = pConv->GetInStream();
319
const char* endtag = NULL;
320
OBFormat* pFormat = pConv->GetInFormat();
321
XMLBaseFormat* pxmlFormat = static_cast<XMLBaseFormat*>(pFormat);
323
endtag = pxmlFormat->EndTag();
325
// static char* OrigBuffer;
327
// OrigBuffer = buffer;
329
return gettomatch(*ifs, buffer, len , endtag);//was + OrigBuffer - buffer
332
int XMLConversion::WriteStream(void * context, const char * buffer, int len)
334
XMLConversion* pxmlConv = static_cast<XMLConversion*>(context);
335
ostream* ofs = pxmlConv->GetOutStream();
336
ofs->write(buffer,len);
320
ifs->get(buffer, len+1, '>');
321
streamsize count = strlen(buffer);
327
buffer[++count] = '\0';
330
if (ifs->peek() == '\n' || ifs->peek() == '\r')
332
ifs->get(); // remove any trailing endlines
337
//////////////////////////////////////////////////////////
338
int XMLConversion::WriteStream(void * context, const char * buffer, int len)
340
XMLConversion* pxmlConv = static_cast<XMLConversion*>(context);
341
ostream* ofs = pxmlConv->GetOutStream();
342
if(len>0) //a call with len=0 coming from xmlFreeTextWriter
343
{ //called from destructor of XMLConversion was causing crash
344
ofs->write(buffer,len);
343
352
} //namespace OpenBabel
344
353
// http://xmlsoft.org/html/libxml-xmlreader.html