1
/**********************************************************************
2
Copyright (C) 2005 by Chris Morley
3
Some portions Copyright (C) 2006 by Geoffrey R. Hutchison
5
This file is part of the Open Babel project.
6
For more information, see <http://openbabel.sourceforge.net/>
8
This program is free software; you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation version 2 of the License.
12
This program is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
GNU General Public License for more details.
16
***********************************************************************/
17
#include <openbabel/babelconfig.h>
19
#include <openbabel/math/matrix3x3.h>
20
#include <openbabel/kinetics.h>
21
#include <openbabel/atomclass.h>
22
#include <openbabel/xml.h>
24
#ifdef HAVE_SHARED_POINTER
25
#include <openbabel/reaction.h>
30
#pragma warning (disable : 4800)
42
class CMLFormat : public XMLMoleculeFormat
45
const char* CML1NamespaceURI()const{return "http://www.xml-cml.org/dtd/cml_1_0_1.dtd";}
46
const char* CML2NamespaceURI()const{return "http://www.xml-cml.org/schema/cml2/core";}
49
//Constuctor used on startup which registers this format type ID
52
OBConversion::RegisterFormat("cml", this, "chemical/x-cml");
53
OBConversion::RegisterOptionParam("1", this);
54
OBConversion::RegisterOptionParam("a", this);
55
OBConversion::RegisterOptionParam("N", this, 1);
56
OBConversion::RegisterOptionParam("m", this);
57
OBConversion::RegisterOptionParam("x", this);
58
OBConversion::RegisterOptionParam("h", this);
59
OBConversion::RegisterOptionParam("c", this);
60
OBConversion::RegisterOptionParam("p", this);
61
OBConversion::RegisterOptionParam("2", this, 0, OBConversion::INOPTIONS);
63
XMLConversion::RegisterXMLFormat(this, true); //this is the default XLMformat
64
XMLConversion::RegisterXMLFormat(this, false,CML1NamespaceURI());//CML1 also
65
XMLConversion::RegisterXMLFormat(this, false,CML2NamespaceURI());//Old CML2 also
67
virtual const char* NamespaceURI()const{return "http://www.xml-cml.org/schema";}
69
virtual const char* Description()
72
"Chemical Markup Language\n"
73
"XML format. This implementation uses libxml2.\n"
74
"Write options for CML: -x[flags] (e.g. -x1ac)\n"
75
" 1 output CML1 (rather than CML2)\n"
76
" a output array format for atoms and bonds\n"
77
" A write aromatic bonds as such, not Kekule form\n"
78
" h use hydrogenCount for all hydrogens\n"
79
" m output metadata\n"
80
" x omit XML and namespace declarations\n"
81
" c continuous output: no formatting\n"
82
" p output properties\n"
83
" N<prefix> add namespace prefix to elements\n\n"
84
"Input options, e.g. -a2\n"
85
" 2 input 2D rather than 3D coordinates if both provided\n\n";
88
virtual const char* SpecificationURL()
89
{return "http://www.xml-cml.org/";}
91
virtual const char* GetMIMEType()
92
{ return "chemical/x-cml"; };
94
virtual unsigned int Flags()
96
return READXML | ZEROATOMSOK;
99
virtual bool WriteChemObject(OBConversion* pConv);
100
virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
102
virtual bool DoElement(const string& name);
103
virtual bool EndElement(const string& name);
104
virtual const char* EndTag(){ return "/molecule>"; };
106
typedef vector< vector< pair<string,string> > > cmlArray;
107
bool TransferArray(cmlArray& arr);
108
bool TransferElement(cmlArray& arr);
111
bool DoMolWideData();
112
bool ParseFormula(string& formula, OBMol* pmol);
113
void ReadNasaThermo();
115
void MakeAtomIds(OBMol& mol, vector<string>& atomIDs);
116
void WriteFormula(OBMol mol); //passes copy of mol
117
void WriteMetadataList();
119
void WriteBondStereo(OBBond* pbond, vector<string>& atomIDs);
120
void WriteCrystal(OBMol& mol);
121
void WriteProperties(OBMol& mol, bool& propertyListWritten);
122
void WriteThermo(OBMol& mol, bool& propertyListWritten);
123
string GetMolID();//for error mesaages
124
bool WriteInChI(OBMol& mol);
125
bool WriteVibrationData(OBMol& mol);
126
bool WriteRotationData(OBMol& mol);
129
map<string,int> AtomMap; //key=atom id, value= ob atom index
132
vector< pair<string,string> > cmlBondOrAtom; //for cml1 only
133
vector< pair<string,string> > molWideData;
134
bool inBondArray; //for cml1 only
138
string CurrentAtomID;
139
int CrystalScalarsNeeded, PropertyScalarsNeeded, TransformsNeeded;
140
vector<double> CrystalVals;
141
OBUnitCell* pUnitCell;
142
SpaceGroup _SpaceGroup;
143
string SpaceGroupName;
144
string titleonproperty;
147
////////////////////////////////////////////////////////////
148
//Make an instance of the format class
149
CMLFormat theCMLFormat;
153
There are 4 CML styles: CML1, CML2, both with and without array forms.
154
All styles are converted into the same internal structure in AtomArray
155
and BondArray which contains pairs of (attribute)name/value pairs for
156
each atom or bond. At the end of molecule this is analysed in DoAtoms()
157
and DoBonds() to construct an OBMol.
161
///////////////////////////////////////////////////////
162
bool CMLFormat::DoElement(const string& name)
164
//A linear search is good enough for <20 element names; commonest at start.
168
cmlBondOrAtom.clear();
169
int IsEmpty = xmlTextReaderIsEmptyElement(reader());
170
TransferElement(AtomArray);
171
if(IsEmpty==1) //have to push here because end atom may not be called
172
AtomArray.push_back(cmlBondOrAtom);
174
else if(name=="bond")
176
cmlBondOrAtom.clear();
177
int IsEmpty = xmlTextReaderIsEmptyElement(reader());
178
TransferElement(BondArray);
180
BondArray.push_back(cmlBondOrAtom);
182
else if(name=="molecule")
184
//Ignore atoms with "ref" attributes
185
if(xmlTextReaderGetAttribute(reader(), BAD_CAST "ref"))
194
CrystalScalarsNeeded=0;
197
PropertyScalarsNeeded=0;
200
return true; //ignore if already inside a molecule
201
_pmol->BeginModify();
204
const xmlChar* ptitle = xmlTextReaderGetAttribute(reader(), BAD_CAST "title");
206
ptitle = xmlTextReaderGetAttribute(reader(), BAD_CAST "id");
208
ptitle = xmlTextReaderGetAttribute(reader(), BAD_CAST "molID");//Marvin
210
_pmol->SetTitle((const char*)ptitle);
212
ptitle = xmlTextReaderGetAttribute(reader(), BAD_CAST "spinMultiplicity");
214
_pmol->SetTotalSpinMultiplicity(atoi((const char*)ptitle));
216
// free((void*)ptitle);//libxml2 doc says "The string must be deallocated by the caller."
219
else if(name=="atomArray")
221
if(!inFormula) //do nothing when a child of <formula>
224
TransferArray(AtomArray);
227
else if(name=="bondArray")
230
TransferArray(BondArray);
232
else if(name=="atomParity" || name=="bondStereo")
234
//Save in molWideData:
235
//the content, the atomRefs4 attribute, and (for atomParity only) the centralAtom
236
string atrefs4("atomRefs4");
237
value = _pxmlConv->GetAttribute(atrefs4.c_str());
238
pair<string,string> atomrefdata(atrefs4,value);
240
xmlTextReaderRead(reader());
241
const xmlChar* pvalue = xmlTextReaderConstValue(reader());
244
value = (const char*)pvalue;
246
pair<string,string> nameAndvalue(name,value);
247
molWideData.push_back(nameAndvalue);
248
molWideData.push_back(atomrefdata);
251
if(name=="atomParity")
252
ss << AtomArray.size()+1; //index of current atom
254
ss << BondArray.size(); //index of current bond
255
pair<string,string> atdata("centralAtomOrBond",ss.str());
256
molWideData.push_back(atdata);
259
else if(name=="name")
262
_pmol->SetTitle(_pxmlConv->GetContent().c_str());
264
else if(name=="formula")
266
if(!xmlTextReaderIsEmptyElement(reader()))
268
//Only concise form is currently supported
269
const xmlChar* pformula = xmlTextReaderGetAttribute(reader(), BAD_CAST "concise");
272
RawFormula = (const char*)pformula;
273
// free((void*)pformula);
276
else if(name=="crystal")
278
CrystalScalarsNeeded = 6;
280
else if(name=="scalar")
282
if(CrystalScalarsNeeded)
284
xmlTextReaderRead(reader());
285
const xmlChar* pvalue = xmlTextReaderConstValue(reader());
288
CrystalVals.push_back(atof((const char*)pvalue));
289
if(--CrystalScalarsNeeded==0)
291
pUnitCell = new OBUnitCell;
292
pUnitCell->SetOrigin(fileformatInput);
293
pUnitCell->SetData(CrystalVals[0],CrystalVals[1],CrystalVals[2],
294
CrystalVals[3],CrystalVals[4],CrystalVals[5]);
295
_pmol->SetData(pUnitCell);
299
else if(PropertyScalarsNeeded)
301
//Reads OBPairData(like SDF properties). Name is in scalar title or id attribute
302
const xmlChar* pattr = xmlTextReaderGetAttribute(reader(), BAD_CAST "title");
304
pattr = xmlTextReaderGetAttribute(reader(), BAD_CAST "id");
308
attr = (const char*)pattr;
310
attr = titleonproperty;
311
// free((void*)pattr);//"The string must be deallocated by the caller."
313
xmlTextReaderRead(reader());
314
const xmlChar* pvalue = xmlTextReaderConstValue(reader());
315
if(pvalue && !attr.empty())
317
OBPairData *dp = new OBPairData;
318
dp->SetAttribute(attr);
319
string val((const char*)pvalue);
320
dp->SetValue(Trim(val));
321
dp->SetOrigin(fileformatInput);
324
PropertyScalarsNeeded=0;
327
else if(name=="symmetry")
329
const xmlChar* pname = xmlTextReaderGetAttribute(reader(), BAD_CAST "spaceGroup");
332
SpaceGroupName = (const char*)pname;
333
// free((void*)pname);
336
else if(name=="transform3")
338
xmlTextReaderRead(reader());
339
const xmlChar* ptransform = xmlTextReaderConstValue(reader());
342
string t = (const char*)ptransform;
343
_SpaceGroup.AddTransform(t);
344
// free((void*)ptransform);
347
else if(name=="property")
349
//***pattr need to be deleted***
350
const char* pattr = (const char*)xmlTextReaderGetAttribute(reader(), BAD_CAST "dictRef");
351
if(pattr && !strcmp(pattr,"Thermo_OldNasa"))
355
pattr = (const char*)xmlTextReaderGetAttribute(reader(), BAD_CAST "title");
357
titleonproperty = pattr;
359
titleonproperty.clear();
360
PropertyScalarsNeeded = 1;
365
else if(name=="string" || name=="float" || name=="integer"
366
|| name=="coordinate3"|| name=="coordinate2")
368
string name = _pxmlConv->GetAttribute("builtin");
369
xmlTextReaderRead(reader());
370
const xmlChar* pvalue = xmlTextReaderConstValue(reader());
373
string value = (const char*)pvalue;
375
pair<string,string> nameAndvalue(name,value);
376
cmlBondOrAtom.push_back(nameAndvalue);
378
else if(name=="stringArray" || name=="floatArray" || name=="integerArray")
380
string name = _pxmlConv->GetAttribute("builtin");
381
// cmlArray& arr = (name=="atomRef1" || name=="atomRef2" || name=="order")
382
// ? BondArray : AtomArray;
383
cmlArray& arr = inBondArray ? BondArray : AtomArray;
385
xmlTextReaderRead(reader());
386
const xmlChar* pvalue = xmlTextReaderConstValue(reader());
389
string value = (const char*)pvalue;
391
vector<string> items;
392
tokenize(items,value);
393
if(arr.size()<items.size())
394
arr.resize(items.size());
396
for(i=0;i<items.size();++i)
398
pair<string,string> nameAndvalue(name,items[i]);
399
arr[i].push_back(nameAndvalue);
403
//The end element event would not be called for <element/>, so call it explicitly.
404
if(xmlTextReaderIsEmptyElement(reader())==1)
405
return EndElement(name);
410
//////////////////////////////////////////////////////
411
bool CMLFormat::EndElement(const string& name)
415
//ok for cml1 but is not called at end of <atom.../>
416
AtomArray.push_back(cmlBondOrAtom);
418
else if(name=="bond")
420
BondArray.push_back(cmlBondOrAtom);
422
else if(name=="formula")
424
else if(name=="molecule")
430
//Use formula only if nothing else provided
431
if(_pmol->NumAtoms()==0 && !RawFormula.empty())
432
if(!ParseFormula(RawFormula, _pmol))
433
obErrorLog.ThrowError(_pmol->GetTitle(),"Error in formula", obError);
435
//ensure unbonded atoms are seen as such
436
if(_pmol->NumBonds()==0)
437
FOR_ATOMS_OF_MOL(a, *_pmol)
440
_pmol->AssignSpinMultiplicity();
442
return (--_embedlevel>=0); //false to stop parsing if no further embedded mols
443
// return false;//means stop parsing
445
else if(name=="symmetry")
447
const SpaceGroup *group = SpaceGroup::GetSpaceGroup(SpaceGroupName);
448
if ((!group || !(_SpaceGroup == *group)) && _SpaceGroup.IsValid())
449
group = SpaceGroup::Find(&_SpaceGroup);
451
pUnitCell->SetSpaceGroup(group);
453
pUnitCell->SetSpaceGroup(SpaceGroupName);
458
/////////////////////////////////////////////////////////
460
///Interprets atoms from AtomArray and writes then to an OBMol
461
bool CMLFormat::DoAtoms()
463
OBAtomClassData aclass;
464
int dim=0; //dimension of molecule
465
bool use2d = _pxmlConv->IsOption("2", OBConversion::INOPTIONS);
467
int nAtoms=_pmol->NumAtoms();//was 0
468
cmlArray::iterator AtomIter;
469
for(AtomIter=AtomArray.begin();AtomIter!=AtomArray.end();++AtomIter)
472
OBAtom* pAtom = _pmol->NewAtom();
477
bool using3=false, using2=false, usingFract=false;
479
vector<pair<string,string> >::iterator AttributeIter;
480
for(AttributeIter=AtomIter->begin();AttributeIter!=AtomIter->end();++AttributeIter)
482
string& attrname = AttributeIter->first;
483
string& value = AttributeIter->second;
485
if(attrname=="id" || attrname=="atomId" || attrname=="atomID")//which one correct?
488
if(AtomMap.count(value)>0)
489
obErrorLog.ThrowError(GetMolID(),"The atom id " + value + " is not unique", obWarning);
490
AtomMap[value] = nhvy;//nAtoms;
492
//If the id begins with "aa", "ab", etc, the number that follows is taken as an atom class
493
if(value[0]=='a' && value[1]>='a' && value[1]<='z')
494
aclass.Add(nAtoms, atoi(value.c_str()+2));
497
else if(attrname=="elementType")
500
atno=etab.GetAtomicNum(value.c_str(),iso);
501
pAtom->SetAtomicNum(atno);
503
pAtom->SetIsotope(iso);
507
//If more than one set of coordinates provided,
508
//prefer 3D over 2D over 3Dfractional,
509
//but if use2d is true, prefer 2D over 3D
510
else if((attrname=="x3" || attrname=="y3" || attrname=="z3" || attrname=="xyz3") && !use2d)
515
else if((attrname=="x2" || attrname=="y2" || attrname=="z2" || attrname=="xy2") && !using3)
520
else if(pUnitCell && !using3 && !using2
521
&& (attrname=="xFract" || attrname=="yFract" || attrname=="zFract"))
524
if(using3 && attrname=="x3" || using2 && attrname=="x2" || usingFract && attrname=="xFract")
525
x=strtod(value.c_str(),NULL);
526
else if(using3 && attrname=="y3" || using2 && attrname=="y2" || usingFract && attrname=="yFract")
527
y=strtod(value.c_str(),NULL);
528
else if(using3 && attrname=="z3" || using2 && attrname=="z2" || usingFract && attrname=="zFract")
529
z=strtod(value.c_str(),NULL);
531
else if(using2 && attrname=="xy2")
534
tokenize(vals,value);
537
x=strtod(vals[0].c_str(),NULL);
538
y=strtod(vals[1].c_str(),NULL);
541
else if(using3 && attrname=="xyz3")
544
tokenize(vals,value);
547
x=strtod(vals[0].c_str(),NULL);
548
y=strtod(vals[1].c_str(),NULL);
549
z=strtod(vals[2].c_str(),NULL);
553
if(attrname=="hydrogenCount")
557
for(i=0;i<atoi(value.c_str());++i)
559
OBAtom* hatom = _pmol->NewAtom();
560
hatom->SetAtomicNum(1);
562
_pmol->AddBond(nhvy,_pmol->NumAtoms(),1);
567
else if(attrname=="formalCharge")
568
pAtom->SetFormalCharge(atoi(value.c_str()));
570
else if(attrname=="spinMultiplicity")
571
pAtom->SetSpinMultiplicity(atoi(value.c_str()));
573
else if(attrname=="atomRefs4")//from atomParity element
577
// Have 4 atoms defining the parity
578
// but don't currently use them TODO
579
//Simply use parity as given to set clockwise/anticlockwise
581
attrname = (++AttributeIter)->first;
582
if(attrname=="parity")
584
value = AttributeIter->second;
585
int parity = atoi(value.c_str());
586
if(parity>0) pAtom->SetClockwiseStereo();
587
if(parity<0) pAtom->SetAntiClockwiseStereo();
591
else if(attrname=="radical") //Marvin extension
594
if(value=="monovalent")
596
else if(value=="divalent")
598
else if(value=="divalent3")
600
else if(value=="divalent1")
602
pAtom->SetSpinMultiplicity(spin);
604
else if(attrname=="isotopeNumber" || attrname=="isotope")
605
pAtom->SetIsotope(atoi(value.c_str()));
609
//Save atom coordinates
610
if(using3 || usingFract)
621
//Coordinates are fractional
624
v *= pUnitCell->GetOrthoMatrix();
628
pAtom->SetVector(x, y, z);
632
_pmol->SetData((new OBAtomClassData(aclass)));
634
_pmol->SetDimension(dim);
637
/////////////////////////////////////////////////////////////////////
639
///Interprets bonds from BondArray and writes then to an OBMol
640
bool CMLFormat::DoBonds()
642
vector<pair<string,string> >::iterator AttributeIter;
643
cmlArray::iterator BondIter;
644
bool HaveWarned = false;
645
for(BondIter=BondArray.begin();BondIter!=BondArray.end();++BondIter)
647
int indx1=0,indx2=0, ord=0;
648
string bondstereo, BondStereoRefs;
649
bool PossibleBond = false;
651
for(AttributeIter=BondIter->begin();AttributeIter!=BondIter->end();++AttributeIter)
653
string attrname = AttributeIter->first;
654
string value = AttributeIter->second;
658
if(attrname.compare(0, 7, "atomRef")==0) //generic
661
string::size_type pos = value.find(' ');
663
if(!HaveWarned && (attrname=="atomRefs1"
664
|| (attrname=="atomRefs2" && pos==string::npos)))
666
obErrorLog.ThrowError(GetMolID(),
667
attrname + " is not legal CML in this context, "
668
"but OpenBabel will attempt to understand what was meant.", obWarning);
674
if(pos!=string::npos)
676
indx1 = AtomMap[value.substr(0,pos)];
677
string temp =value.substr(pos+1);
678
indx2 = AtomMap[Trim(temp)];
679
//C4239 indx2 = AtomMap[Trim(value.substr(pos+1))];
683
indx1 = AtomMap[value];
688
indx2 = AtomMap[value];
690
indx1=-1; //forces error
693
else if(attrname=="order")
695
const char bo = value[0];
704
ord = strtol(value.c_str(), &endptr, 10);
711
if(indx1<=0 || indx2<=0)
713
obErrorLog.ThrowError(GetMolID(),"Incorrect bond attributes", obError);
716
if(ord==0) //Bonds are single if order is not specified
718
_pmol->AddBond(indx1,indx2,ord,0);
725
/////////////////////////////////////////////////////////////////
727
bool CMLFormat::DoMolWideData()
729
//Handle atomParity and bondStereo
730
vector<pair<string,string> >::iterator AttributeIter;
731
for(AttributeIter=molWideData.begin();AttributeIter!=molWideData.end();++AttributeIter)
733
string name = AttributeIter->first;
734
string value = AttributeIter->second;
736
if(name=="atomParity" || name=="bondStereo")
738
vector<unsigned int> AtomRefIdx;
740
string nextname = (++AttributeIter)->first;
741
string atrefsvalue = AttributeIter->second;
742
if(nextname=="atomRefs4" && !atrefsvalue.empty())
745
tokenize(ids, atrefsvalue);
748
AtomRefIdx.push_back(AtomMap[ids[i]]);
751
nextname = (++AttributeIter)->first;
752
if(!(nextname=="centralAtomOrBond"))
755
int Idx = atoi(AttributeIter->second.c_str());
756
if(name=="atomParity")
758
int parity =atoi(value.c_str());
759
//We now have for the parity for the atom of index AtIdx
760
//calculated using the atoms in AtomRefIdx.
761
//Need now to adjust the parity to match the standard order
763
OBAtom* patom = _pmol->GetAtom(Idx);
767
patom->SetClockwiseStereo();
769
patom->SetAntiClockwiseStereo();
770
OBChiralData* cd = new OBChiralData;
772
cd->SetAtom4Refs(AtomRefIdx, input);
780
if(atrefsvalue.empty())
782
OBBond* pDBond = _pmol->GetBond(Idx);
783
//With no atomRefs4, the specification is either W, H,
794
// ... or ordinary cis/trans
795
if(value!="C" && value!="T")
797
//which is valid only with one substituent on each C
799
OBAtom* pAt1 = pDBond->GetBeginAtom();
800
OBAtom* pAt2 = pDBond->GetEndAtom();
801
FOR_NBORS_OF_ATOM(a1,pAt1)
803
if(!a1->IsHydrogen() && &*a1!=pAt2)
805
pbond1 = _pmol->GetBond(pAt1->GetIdx(),a1->GetIdx());
808
FOR_NBORS_OF_ATOM(a2,pAt2)
810
if(!a2->IsHydrogen() && &*a2!=pAt1)
812
pbond2 = _pmol->GetBond(pAt2->GetIdx(),a2->GetIdx());
817
pbond1 = _pmol->GetBond(AtomRefIdx[0],AtomRefIdx[1]);
818
pbond2 = _pmol->GetBond(AtomRefIdx[2],AtomRefIdx[3]);
821
if(!pbond1 || !pbond2)
823
//Congugated double bonds are a special case see OBMol2Smi::GetCisTransBondSymbol()
824
//Feb07 C/C=C/C=C/C=C/C trans/trans/trans has OB_TORUP_BOND and OB_TORDOWN in OBMol as
826
if(pbond1->IsUp() || pbond1->IsDown())
828
if((pbond1->IsUp() && (value=="T")) || (pbond1->IsDown() && value=="C"))
834
/* else if(pbond2->IsUp() || pbond2->IsDown()) //congugated double bonds
836
if((pbond2->IsUp() && (value=="T")) || (pbond2->IsDown() && value=="C"))
851
//Need to mark direction of the other bond also, in case
852
// it is part of a conjugated chain (when u/d is reversed see above)
853
OBAtom* pAtom2 = _pmol->GetAtom(AtomRefIdx[2]); //end of double bond
854
FOR_BONDS_OF_ATOM(b, pAtom2)
856
if(&*b==pbond2 || b->IsDouble()) continue;
857
if((b->GetNbrAtom(pAtom2))->GetAtomicNum()==6)
869
//Clear here to aid embedded molecules
877
//////////////////////////////////////////////////////////
878
bool CMLFormat::TransferArray(cmlArray& arr)
880
//Reads attributes of the current node, e.g. atomID="a1 a2 a3"
881
//parses each of them into their separate items, e.g. a1, a2, a3
882
//and pushes them as a pairs in each of the members of the array
883
// e.g. ("atomID", "a1") in AtomArray[0], ("atomID", "a2") in AtomArray[1]
885
if(xmlTextReaderHasAttributes(reader()))
887
int ret = xmlTextReaderMoveToFirstAttribute(reader());
890
const xmlChar* pname = xmlTextReaderConstName(reader());
891
string name((const char*)pname);
892
const xmlChar* pvalue = xmlTextReaderConstValue(reader());
895
value = (const char*)pvalue;
896
vector<string> items;
897
tokenize(items,value);
898
if(arr.size()<items.size())
899
arr.resize(items.size());
901
for(i=0;i<items.size();++i)
903
pair<string,string> nameAndvalue(name,items[i]);
904
arr[i].push_back(nameAndvalue);
906
ret = xmlTextReaderMoveToNextAttribute(reader());
912
bool CMLFormat::TransferElement(cmlArray& arr)
914
//Reads the attributes of the current node, e.g. <atom id="a1" elementType="C"/>
915
//pushes each of them as a pairs into each of the members of the array
916
// e.g. ("id", "a1") and (elementType", "C") will be put into AtomArray[n]
917
//where n is the number of times this routine has been called before.
919
if(xmlTextReaderHasAttributes(reader()))
921
int ret = xmlTextReaderMoveToFirstAttribute(reader());
924
const xmlChar* pname = xmlTextReaderConstName(reader());
925
string name((const char*)pname);
926
const xmlChar* pvalue = xmlTextReaderConstValue(reader());
930
value = (const char*)pvalue;
933
pair<string,string> nameAndvalue(name,value);
934
cmlBondOrAtom.push_back(nameAndvalue);
935
ret = xmlTextReaderMoveToNextAttribute(reader());
941
bool CMLFormat::ParseFormula(string& formula, OBMol* pmol)
943
vector<string> items;
944
tokenize(items, formula);
945
vector<string>::iterator iSymbol, iNumber;
946
for(iSymbol=items.begin();iSymbol!=items.end();++iSymbol)
949
if(iNumber==items.end())
951
int n=atoi(iNumber->c_str());
953
atno=etab.GetAtomicNum(iSymbol++->c_str(),iso);
959
OBAtom* pAtom = pmol->NewAtom();
961
pAtom->SetAtomicNum(atno);
963
pAtom->SetIsotope(iso);
969
void CMLFormat::ReadNasaThermo()
971
//Do all NasaThermo data here
972
OBNasaThermoData* pTD = new OBNasaThermoData;
973
pTD->SetOrigin(fileformatInput);
977
xmlTextReaderRead(reader());
978
int typ = xmlTextReaderNodeType(reader());
979
if(typ==XML_READER_TYPE_SIGNIFICANT_WHITESPACE)
981
const char* pname = (const char*)xmlTextReaderConstLocalName(reader());
982
if(typ==XML_READER_TYPE_END_ELEMENT)
984
if(!strcmp(pname,"property"))//end of element
989
const char * pattr = (const char*)xmlTextReaderGetAttribute(reader(), BAD_CAST "dictRef");
990
xmlTextReaderRead(reader());
991
const char* pvalue = (const char*)xmlTextReaderConstValue(reader());
994
if(!strcmp(pattr,"NasaLowT"))
995
pTD->SetLoT(atof(pvalue));
996
else if(!strcmp(pattr,"NasaHighT"))
997
pTD->SetHiT(atof(pvalue));
998
else if(!strcmp(pattr,"NasaMidT"))
999
pTD->SetMidT(atof(pvalue));
1000
else if(!strcmp(pattr,"NasaCoeffs"))
1002
vector<string> vals;
1003
tokenize(vals, pvalue);
1004
for(int i=0;i<14;++i)
1005
pTD->SetCoeff(i, atof(vals[i].c_str()));
1012
void CMLFormat::WriteMetadataList()
1014
static const xmlChar C_METADATALIST[] = "metadataList";
1015
static const xmlChar C_METADATA[] = "metadata";
1016
static const xmlChar C_TITLE[] = "title";
1017
static const xmlChar C_NAME[] = "name";
1018
static const xmlChar C_CONTENT[] = "content";
1020
xmlTextWriterStartElement(writer(), C_METADATALIST);
1021
xmlTextWriterWriteAttribute(writer(), C_TITLE, BAD_CAST "generated by OpenBabel");
1023
xmlTextWriterStartElement(writer(), C_METADATA);
1024
xmlTextWriterWriteAttribute(writer(), C_NAME, BAD_CAST "dc:creator");
1025
string version("OpenBabel version ");
1026
version += BABEL_VERSION;
1027
xmlTextWriterWriteAttribute(writer(), C_CONTENT, BAD_CAST version.c_str());
1028
xmlTextWriterEndElement(writer());
1030
xmlTextWriterStartElement(writer(), C_METADATA);
1031
xmlTextWriterWriteAttribute(writer(), C_NAME, BAD_CAST "dc:description");
1032
xmlTextWriterWriteAttribute(writer(), C_CONTENT, BAD_CAST "Conversion of legacy filetype to CML");
1033
xmlTextWriterEndElement(writer());
1035
xmlTextWriterStartElement(writer(), C_METADATA);
1036
xmlTextWriterWriteAttribute(writer(), C_NAME, BAD_CAST "dc:type");
1037
xmlTextWriterWriteAttribute(writer(), C_CONTENT, BAD_CAST "chemistry");
1038
xmlTextWriterEndElement(writer());
1040
xmlTextWriterStartElement(writer(), C_METADATA);
1041
xmlTextWriterWriteAttribute(writer(), C_NAME, BAD_CAST "dc:contributor");
1042
xmlTextWriterWriteAttribute(writer(), C_CONTENT, BAD_CAST "unknown");
1043
xmlTextWriterEndElement(writer());
1045
xmlTextWriterStartElement(writer(), C_METADATA);
1046
xmlTextWriterWriteAttribute(writer(), C_NAME, BAD_CAST "dc:date");
1047
xmlTextWriterWriteAttribute(writer(), C_CONTENT, BAD_CAST getTimestr().c_str());
1048
xmlTextWriterEndElement(writer());
1050
xmlTextWriterStartElement(writer(), C_METADATA);
1051
xmlTextWriterWriteAttribute(writer(), C_NAME, BAD_CAST "cmlm:structure");
1052
xmlTextWriterWriteAttribute(writer(), C_CONTENT, BAD_CAST "yes");
1053
xmlTextWriterEndElement(writer());
1055
xmlTextWriterEndElement(writer());
1058
string CMLFormat::getTimestr()
1060
const int TIME_STR_SIZE = 64;
1061
time_t akttime; /* Systemtime */
1062
char timestr[TIME_STR_SIZE + 1] = ""; /* Timestring */
1063
size_t time_res; /* Result of strftime */
1065
/* ---- Get the system-time ---- */
1066
akttime = time((time_t *) NULL);
1067
time_res = strftime(timestr,
1069
"%a %b %d %H:%M:%S %Z %Y",
1070
localtime((time_t *) &akttime)
1075
/////////////////////////////////////////////////////////////
1077
bool CMLFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
1079
static const xmlChar C_MOLECULE[] = "molecule";
1080
static const xmlChar C_CML[] = "cml";
1081
static const xmlChar C_ATOMARRAY[] = "atomArray";
1082
static const xmlChar C_BONDARRAY[] = "bondArray";
1083
static const xmlChar C_ATOM[] = "atom";
1084
static const xmlChar C_BOND[] = "bond";
1085
static const xmlChar C_ID[] = "id";
1086
// static const xmlChar C_TITLE[] = "title";
1087
static const xmlChar C_NAME[] = "name";
1088
static const xmlChar C_ATOMPARITY[] = "atomParity";
1089
// static const xmlChar C_BONDSTEREO[] = "bondStereo";
1091
static const xmlChar C_X2[] = "x2";
1092
static const xmlChar C_Y2[] = "y2";
1093
static const xmlChar C_X3[] = "x3";
1094
static const xmlChar C_Y3[] = "y3";
1095
static const xmlChar C_Z3[] = "z3";
1096
static const xmlChar C_XFRACT[] = "xFract";
1097
static const xmlChar C_YFRACT[] = "yFract";
1098
static const xmlChar C_ZFRACT[] = "zFract";
1099
static const xmlChar C_ATOMID[] = "atomID";
1100
static const xmlChar C_ELEMENTTYPE[] = "elementType";
1101
static const xmlChar C_ISOTOPE[] = "isotope";
1102
static const xmlChar C_SPINMULTIPLICITY[] = "spinMultiplicity";
1103
static const xmlChar C_HYDROGENCOUNT[] = "hydrogenCount";
1104
static const xmlChar C_FORMALCHARGE[] = "formalCharge";
1105
static const xmlChar C_ATOMREFS2[] = "atomRefs2";
1106
static const xmlChar C_ATOMREF1[] = "atomRef1";
1107
static const xmlChar C_ATOMREF2[] = "atomRef2";
1108
static const xmlChar C_ORDER[] = "order";
1109
static const xmlChar C_ATOMREFS4[] = "atomRefs4";
1110
/* defined in other functions
1111
static const xmlChar C_FORMULA[] = "formula";
1112
static const xmlChar C_CONCISE[] = "concise";
1113
static const xmlChar C_PROPERTYLIST[] = "propertyList";
1114
static const xmlChar C_PROPERTY[] = "property";
1115
static const xmlChar C_SCALAR[] = "scalar";
1118
static const xmlChar C_STRING[] = "string";
1119
static const xmlChar C_INTEGER[] = "integer";
1120
static const xmlChar C_FLOAT[] = "floatg";
1121
static const xmlChar C_BUILTIN[] = "builtin";
1122
static const xmlChar C_STRINGARRAY[] = "stringArray";
1123
static const xmlChar C_INTEGERARRAY[] = "integerArray";
1124
static const xmlChar C_FLOATARRAY[] = "floatArray";
1125
/* used as ordinary text
1129
const xmlChar* C_X3orFRACT = C_X3; //Non-fraction coordinates are the default
1130
const xmlChar* C_Y3orFRACT = C_Y3;
1131
const xmlChar* C_Z3orFRACT = C_Z3;
1133
_pxmlConv = XMLConversion::GetDerived(pConv,false);
1137
bool cml1 = _pxmlConv->IsOption("1");
1138
bool arrayform = _pxmlConv->IsOption("a");
1139
bool WriteAromaticBonds = _pxmlConv->IsOption("A");
1140
prefix = BAD_CAST _pxmlConv->IsOption("N");
1143
//Write the header on the first object (incl OBReaction)
1144
//unless x option set or if has been called from elsewhere (e.g. CMLReact)
1145
if(!_pxmlConv->IsOption("MolsNotStandalone") && _pxmlConv->GetOutputIndex()==1)
1147
if(!_pxmlConv->IsOption("x"))
1149
xmlTextWriterStartDocument(writer(), NULL, NULL, NULL);
1151
uri = BAD_CAST CML1NamespaceURI();
1153
uri=BAD_CAST NamespaceURI();// not the old CML2NamespaceURI();
1155
//If more than one molecule to be output, write <cml> at start and </cml> at end.
1156
if(!_pxmlConv->IsLast())
1158
xmlTextWriterStartElementNS(writer(), prefix, C_CML, uri);
1163
OBMol* pmol = dynamic_cast<OBMol*>(pOb);
1166
#ifdef HAVE_SHARED_POINTER
1167
OBReaction* pReact = dynamic_cast<OBReaction*>(pOb);
1170
//Use CMLReact to convert OBReaction object
1171
OBFormat* pCMLRFormat = pConv->FindFormat("cmlr");
1174
obErrorLog.ThrowError(__FUNCTION__, "Cannot find CMLReact format", obError);
1177
//Disable list option and supress topping and tailing in CMLReactFormat.
1178
_pxmlConv->AddOption("l", OBConversion::OUTOPTIONS);
1179
_pxmlConv->AddOption("ReactionsNotStandalone", OBConversion::OUTOPTIONS);
1180
bool ret = pCMLRFormat->WriteMolecule(pOb,_pxmlConv);
1181
_pxmlConv->RemoveOption("ReactionsNotStandalone", OBConversion::OUTOPTIONS);
1191
int numbonds = mol.NumBonds(); //Capture this before deleting Hs
1192
bool UseHydrogenCount=false;
1193
if(_pxmlConv->IsOption("h"))
1195
pmol->DeleteHydrogens();
1196
UseHydrogenCount=true;
1199
bool UseFormulaWithNoBonds=true;
1201
int dim = mol.GetDimension();
1204
xmlTextWriterStartElementNS(writer(), prefix, C_MOLECULE, uri);
1206
const char* id = mol.GetTitle();
1210
//If name is a filename with a path, remove path and extension
1211
string::size_type pos;
1212
pos = name.find_last_of("/\\:");
1213
if(pos!=string::npos)
1215
name.erase(0, pos+1);
1216
pos = name.rfind('.');
1217
if(pos!=string::npos)
1221
if(!isalpha(name[0])) //since ids have to start with a letter, add "id" to those that don't...
1223
xmlTextWriterWriteAttribute(writer(), C_ID, BAD_CAST name.c_str());
1224
if(!isalpha(*id)) //...and write <name> orig title </name>
1226
xmlTextWriterStartElementNS(writer(), prefix, C_NAME, NULL);
1227
xmlTextWriterWriteFormatString(writer(),"%s", id);
1228
xmlTextWriterEndElement(writer());//name
1232
//spinMultiplicity is written as an attribute of <molecule> only when it is not 1 and the molecule has bonds
1233
int smult = mol.GetTotalSpinMultiplicity();
1234
if(smult!=1 && numbonds!=0)
1235
xmlTextWriterWriteFormatAttribute(writer(), C_SPINMULTIPLICITY,"%d", smult);
1237
if(_pxmlConv->IsOption("m") && _pxmlConv->GetOutputIndex()==1) //only on first molecule
1238
WriteMetadataList();
1241
if (!cml1 && mol.HasData(OBGenericDataType::UnitCell))
1243
WriteCrystal(mol);//Output will be in crystallographic form
1244
UseFormulaWithNoBonds = false;
1249
vector<string> atomIds;
1251
if(mol.NumAtoms()>0)
1253
//if molecule has no bonds and atoms doesn't have coordinates, just output formula
1254
if(numbonds==0 && UseFormulaWithNoBonds && !mol.Has2D())
1258
xmlTextWriterStartElementNS(writer(), prefix, C_ATOMARRAY, NULL);
1260
MakeAtomIds(mol, atomIds);//Pre-construct to take into account atom class data
1263
stringstream id, eltyp, iso, chg, spn, hct, x, y, z;
1265
strstream id, eltyp, iso, chg, spn, hct, x, y, z;
1267
bool anyChg=false, anySpin=false, anyIsotope=false;
1268
double X, Y, Z; //atom coordinates
1270
if(mol.GetDimension()!=3)
1271
mol.FindChiralCenters();
1274
vector<OBAtom*>::iterator i;
1275
for (patom = mol.BeginAtom(i);patom;patom = mol.NextAtom(i))
1277
string el(etab.GetSymbol(patom->GetAtomicNum()));
1281
int charge = patom->GetFormalCharge();
1282
int spin = patom->GetSpinMultiplicity();
1283
int isotope =patom->GetIsotope();
1285
int hcount=patom->ImplicitHydrogenCount() + patom->ExplicitHydrogenCount(); //includes H isotopes
1292
//Convert to fractional coordinates
1293
vector3 v = patom->GetVector();
1294
v *= pUnitCell->GetFractionalMatrix();
1298
C_X3orFRACT = C_XFRACT;
1299
C_Y3orFRACT = C_YFRACT;
1300
C_Z3orFRACT = C_ZFRACT;
1301
dim=3; //should already be, but make sure
1312
id << " " << atomIds[patom->GetIdx()];
1314
iso << " " << isotope;
1315
chg << " " << charge;
1317
hct << " " << hcount;
1326
xmlTextWriterStartElementNS(writer(), prefix, C_ATOM, NULL);
1327
xmlTextWriterWriteFormatAttribute(writer(), C_ID,"%s", atomIds[patom->GetIdx()].c_str());
1331
xmlTextWriterWriteFormatAttribute(writer(), C_ELEMENTTYPE,"%s", el.c_str());
1333
xmlTextWriterWriteFormatAttribute(writer(), C_ISOTOPE,"%d", isotope);
1336
xmlTextWriterWriteFormatAttribute(writer(), C_FORMALCHARGE,"%d", charge);
1339
xmlTextWriterWriteFormatAttribute(writer(), C_SPINMULTIPLICITY,"%d", spin);
1341
if(UseHydrogenCount && hcount)
1342
xmlTextWriterWriteFormatAttribute(writer(), C_HYDROGENCOUNT,"%d", hcount);
1346
xmlTextWriterWriteFormatAttribute(writer(), C_X2,"%f", X);
1347
xmlTextWriterWriteFormatAttribute(writer(), C_Y2,"%f", Y);
1351
xmlTextWriterWriteFormatAttribute(writer(), C_X3orFRACT,"%f", X);
1352
xmlTextWriterWriteFormatAttribute(writer(), C_Y3orFRACT,"%f", Y);
1353
xmlTextWriterWriteFormatAttribute(writer(), C_Z3orFRACT,"%f", Z);
1356
if((patom->IsPositiveStereo() || patom->IsClockwise()))
1358
else if(patom->IsNegativeStereo() || patom->IsAntiClockwise())
1362
OBChiralData* cd=(OBChiralData*)patom->GetData(OBGenericDataType::ChiralData);
1365
//UseAtom4Refs from OBChiralData
1366
vector<unsigned int> ref = cd->GetAtom4Refs(input);
1367
while (ref.size()<4)
1368
ref.push_back(patom->GetIdx());
1369
xmlTextWriterStartElementNS(writer(), prefix, C_ATOMPARITY, NULL);
1370
xmlTextWriterWriteFormatAttribute(writer(), C_ATOMREFS4, "%s %s %s %s",
1371
// "a%d a%d a%d a%d", ref[0], ref[1], ref[2], ref[3]);
1372
atomIds[ref[0]].c_str(), atomIds[ref[1]].c_str(),
1373
atomIds[ref[2]].c_str(), atomIds[ref[3]].c_str());
1374
xmlTextWriterWriteFormatString(writer(),"%d", cfg);
1375
xmlTextWriterEndElement(writer());//atomParity
1382
xmlTextWriterStartElementNS(writer(), prefix, C_STRING, NULL);
1383
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s", "elementType");
1384
xmlTextWriterWriteFormatString(writer(),"%s", el.c_str());
1385
xmlTextWriterEndElement(writer());
1389
xmlTextWriterStartElementNS(writer(), prefix, C_INTEGER, NULL);
1390
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s", "formalCharge");
1391
xmlTextWriterWriteFormatString(writer(),"%d", charge);
1392
xmlTextWriterEndElement(writer());
1395
if(UseHydrogenCount && hcount)
1397
xmlTextWriterStartElementNS(writer(), prefix, C_INTEGER, NULL);
1398
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s", "hydrogenCount");
1399
xmlTextWriterWriteFormatString(writer(),"%d", hcount);
1400
xmlTextWriterEndElement(writer());
1403
if(dim==2 || dim==3)
1405
xmlTextWriterStartElementNS(writer(), prefix, C_FLOAT, NULL);
1406
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s%d", "x",dim);
1407
xmlTextWriterWriteFormatString(writer(),"%f", X);
1408
xmlTextWriterEndElement(writer());
1410
xmlTextWriterStartElementNS(writer(), prefix, C_FLOAT, NULL);
1411
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s%d", "y",dim);
1412
xmlTextWriterWriteFormatString(writer(),"%f", Y);
1413
xmlTextWriterEndElement(writer());
1418
xmlTextWriterStartElementNS(writer(), prefix, C_FLOAT, NULL);
1419
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s%d", "z",dim);
1420
xmlTextWriterWriteFormatString(writer(),"%f", Z);
1421
xmlTextWriterEndElement(writer());
1423
//Stereochemistry currently not written for CML1
1425
xmlTextWriterEndElement(writer());//atom
1433
xmlTextWriterWriteFormatAttribute(writer(), C_ATOMID,"%s", id.str().c_str());
1434
xmlTextWriterWriteFormatAttribute(writer(), C_ELEMENTTYPE,"%s", eltyp.str().c_str());
1437
xmlTextWriterWriteFormatAttribute(writer(), C_ISOTOPE,"%s", iso.str().c_str());
1440
xmlTextWriterWriteFormatAttribute(writer(), C_FORMALCHARGE,"%s", chg.str().c_str());
1443
xmlTextWriterWriteFormatAttribute(writer(), C_SPINMULTIPLICITY,"%s", spn.str().c_str());
1445
if(UseHydrogenCount)
1446
xmlTextWriterWriteFormatAttribute(writer(), C_HYDROGENCOUNT,"%s", hct.str().c_str());
1450
xmlTextWriterWriteFormatAttribute(writer(), C_X2,"%s", x.str().c_str());
1451
xmlTextWriterWriteFormatAttribute(writer(), C_Y2,"%s", y.str().c_str());
1455
xmlTextWriterWriteFormatAttribute(writer(), C_X3orFRACT,"%s", x.str().c_str());
1456
xmlTextWriterWriteFormatAttribute(writer(), C_Y3orFRACT,"%s", y.str().c_str());
1457
xmlTextWriterWriteFormatAttribute(writer(), C_Z3orFRACT,"%s", z.str().c_str());
1463
xmlTextWriterStartElementNS(writer(), prefix, C_STRINGARRAY, NULL);
1464
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s", "atomID");
1465
xmlTextWriterWriteFormatString(writer(),"%s", id.str().c_str());
1466
xmlTextWriterEndElement(writer());
1468
xmlTextWriterStartElementNS(writer(), prefix, C_STRINGARRAY, NULL);
1469
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s", "elementType");
1470
xmlTextWriterWriteFormatString(writer(),"%s", eltyp.str().c_str());
1471
xmlTextWriterEndElement(writer());
1475
xmlTextWriterStartElementNS(writer(), prefix, C_INTEGERARRAY, NULL);
1476
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s", "formalCharge");
1477
xmlTextWriterWriteFormatString(writer(),"%s", chg.str().c_str());
1478
xmlTextWriterEndElement(writer());
1481
if(UseHydrogenCount)
1483
xmlTextWriterStartElementNS(writer(), prefix, C_INTEGERARRAY, NULL);
1484
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s", "hydrogenCount");
1485
xmlTextWriterWriteFormatString(writer(),"%s", hct.str().c_str());
1486
xmlTextWriterEndElement(writer());
1489
if(dim==2 || dim==3)
1491
xmlTextWriterStartElementNS(writer(), prefix, C_FLOATARRAY, NULL);
1492
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s%d", "x",dim);
1493
xmlTextWriterWriteFormatString(writer(),"%s", x.str().c_str());
1494
xmlTextWriterEndElement(writer());
1496
xmlTextWriterStartElementNS(writer(), prefix, C_FLOATARRAY, NULL);
1497
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s%d", "y",dim);
1498
xmlTextWriterWriteFormatString(writer(),"%s", y.str().c_str());
1499
xmlTextWriterEndElement(writer());
1503
xmlTextWriterStartElementNS(writer(), prefix, C_FLOATARRAY, NULL);
1504
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s%d", "z",dim);
1505
xmlTextWriterWriteFormatString(writer(),"%s", z.str().c_str());
1506
xmlTextWriterEndElement(writer());
1510
xmlTextWriterEndElement(writer());//atomArray
1514
if(mol.NumBonds()>0)
1516
xmlTextWriterStartElementNS(writer(), prefix, C_BONDARRAY, NULL);
1526
vector<OBBond*>::iterator ib;
1527
for (pbond = mol.BeginBond(ib);pbond;pbond = mol.NextBond(ib))
1529
int bo = pbond->GetBO();
1533
if(bo==5 || (WriteAromaticBonds && pbond->IsAromatic())) //aromatic
1538
ref1 = atomIds[pbond->GetBeginAtomIdx()];
1539
ref2 = atomIds[pbond->GetEndAtomIdx()];
1540
xmlTextWriterStartElementNS(writer(), prefix, C_BOND, NULL);
1541
// xmlTextWriterWriteFormatAttribute(writer(), C_ID,"b%d", pbond->GetIdx()); remove bond id
1544
xmlTextWriterWriteFormatAttribute(writer(), C_ATOMREFS2,"%s %s",
1545
ref1.c_str(), ref2.c_str());
1546
xmlTextWriterWriteFormatAttribute(writer(), C_ORDER,"%s", ord.str().c_str());
1548
if(bo==2 || pbond->IsWedge() || pbond->IsHash())
1549
WriteBondStereo(pbond, atomIds);
1554
xmlTextWriterStartElementNS(writer(), prefix, C_STRING, NULL);
1555
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s", "atomRef");
1556
xmlTextWriterWriteFormatString(writer(),"%s", ref1.c_str());
1557
xmlTextWriterEndElement(writer());
1559
xmlTextWriterStartElementNS(writer(), prefix, C_STRING, NULL);
1560
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s", "atomRef");
1561
xmlTextWriterWriteFormatString(writer(),"%s", ref2.c_str());
1562
xmlTextWriterEndElement(writer());
1564
xmlTextWriterStartElementNS(writer(), prefix, C_STRING, NULL);
1565
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s", "order");
1566
xmlTextWriterWriteFormatString(writer(),"%d", bo);
1567
xmlTextWriterEndElement(writer());
1569
xmlTextWriterEndElement(writer());//bond
1570
ord.str(""); //clear (For array form it accumulates.)
1574
if(bo==5 || (WriteAromaticBonds && pbond->IsAromatic())) //aromatic
1579
ref1 += ' ' + atomIds[pbond->GetBeginAtomIdx()];
1580
ref2 += ' ' + atomIds[pbond->GetEndAtomIdx()];
1587
xmlTextWriterWriteFormatAttribute(writer(), C_ATOMREF1, "%s", ref1.c_str());
1588
xmlTextWriterWriteFormatAttribute(writer(), C_ATOMREF2, "%s", ref2.c_str());
1589
xmlTextWriterWriteFormatAttribute(writer(), C_ORDER, "%s", ord.str().c_str());
1594
xmlTextWriterStartElementNS(writer(), prefix, C_STRINGARRAY, NULL);
1595
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s", "atomRef");
1596
xmlTextWriterWriteFormatString(writer(),"%s", ref1.c_str());
1597
xmlTextWriterEndElement(writer());
1599
xmlTextWriterStartElementNS(writer(), prefix, C_STRINGARRAY, NULL);
1600
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s", "atomRef");
1601
xmlTextWriterWriteFormatString(writer(),"%s", ref2.c_str());
1602
xmlTextWriterEndElement(writer());
1604
xmlTextWriterStartElementNS(writer(), prefix, C_STRINGARRAY, NULL);
1605
xmlTextWriterWriteFormatAttribute(writer(), C_BUILTIN,"%s", "order");
1606
xmlTextWriterWriteFormatString(writer(),"%s", ord.str().c_str());
1607
xmlTextWriterEndElement(writer());
1611
xmlTextWriterEndElement(writer());//bondArray
1613
//When array form, write bondStereo here
1616
for (pbond = mol.BeginBond(ib);pbond;pbond = mol.NextBond(ib))
1618
if(pbond->GetBO()==2 || pbond->IsWedge() || pbond->IsHash())
1619
WriteBondStereo(pbond, atomIds);
1624
bool propertyListWritten=false;
1625
if(mol.HasData(ThermoData))
1626
WriteThermo(mol, propertyListWritten);
1628
if(_pxmlConv->IsOption("p"))
1629
WriteProperties(mol, propertyListWritten);
1630
if(propertyListWritten)
1631
xmlTextWriterEndElement(writer());//propertList
1633
xmlTextWriterEndElement(writer());//molecule
1635
//Note that nothing will be written unless the next block is executed
1636
//IsLast() MUST return true for the last molecule.
1637
if(!_pxmlConv->IsOption("MolsNotStandalone") && _pxmlConv->IsLast())
1639
xmlTextWriterEndDocument(writer());
1645
///Constructs a unique id for each atom.
1646
void CMLFormat::MakeAtomIds(OBMol& mol, vector<string>& atomIDs)
1648
/* If there is no atom class data for the atom, the id is a followed by the atom index.
1649
If there is atom class data then it is aa followed by the atom class.
1650
If a subsequent atom has the same atom class, its id is ab followed
1651
by the atom class, and so on. */
1654
map<int,char> acmap; //key=atom calss; value=last letter used as second in id
1655
OBAtomClassData* pac = static_cast<OBAtomClassData*>(mol.GetData("Atom Class"));
1656
atomIDs.push_back("Error"); //atom idex stats at 1. atomIDs[0] is not used
1657
for (int idx=1; idx<=mol.NumAtoms(); ++idx)
1661
if(pac && pac->HasClass(idx))
1663
int ac = pac->GetClass(idx);
1664
char ch2='a'; //default 2nd char
1665
if(acmap.count(ac)>0)
1668
obErrorLog.ThrowError(_pmol->GetTitle(),"CML: too many atoms with same atom class." , obError);
1674
atomIDs.push_back(ss.str());
1678
void CMLFormat::WriteFormula(OBMol mol)
1681
static const xmlChar C_FORMULA[] = "formula";
1682
static const xmlChar C_CONCISE[] = "concise";
1683
if(mol.NumAtoms()==1)
1684
mol.AddHydrogens(false,false);
1685
xmlTextWriterStartElementNS(writer(), prefix, C_FORMULA, NULL);
1686
xmlTextWriterWriteFormatAttribute(writer(), C_CONCISE,"%s", mol.GetSpacedFormula().c_str());
1687
xmlTextWriterEndElement(writer());//formula
1690
void CMLFormat::WriteBondStereo(OBBond* pbond, vector<string>& atomIDs)
1692
static const xmlChar C_ATOMREFS4[] = "atomRefs4";
1693
static const xmlChar C_BONDSTEREO[] = "bondStereo";
1696
if(pbond->IsWedge())
1698
else if(pbond->IsHash())
1702
//this line here because element may not be written with double bond
1703
xmlTextWriterStartElementNS(writer(), prefix, C_BONDSTEREO, NULL);
1706
//double bond stereo
1709
OBAtom* patomA = pbond->GetBeginAtom();
1710
FOR_BONDS_OF_ATOM(b1,patomA)
1712
if(b1->IsUp() || b1->IsDown() )
1714
idx1=(b1->GetNbrAtom(patomA))->GetIdx();
1715
ud1 = b1->IsDown() ? -1 : 1;
1716
// Conjugated double bonds have to be treated differently, see comments
1717
// in OBMol2Smi::GetCisTransBondSymbol(). Reverse symbol for other than first double bond.
1718
if((b1->GetNbrAtom(patomA))->HasDoubleBond())
1723
OBAtom* patomB = pbond->GetEndAtom();
1724
FOR_BONDS_OF_ATOM(b2,patomB)
1726
if(b2->IsUp() || b2->IsDown() )
1728
idx2=(b2->GetNbrAtom(patomB))->GetIdx();
1729
ud2 = b2->IsDown() ? -1 : 1;
1736
xmlTextWriterStartElementNS(writer(), prefix, C_BONDSTEREO, NULL);
1737
xmlTextWriterWriteFormatAttribute(writer(), C_ATOMREFS4, "%s %s %s %s",
1738
// "a%d a%d a%d a%d", idx1, patomA->GetIdx(), patomB->GetIdx(), idx2);
1739
atomIDs[idx1].c_str(), atomIDs[patomA->GetIdx()].c_str(),
1740
atomIDs[patomB->GetIdx()].c_str(), atomIDs[idx2].c_str());
1741
ch = (ud1==ud2) ? 'C' : 'T';
1744
xmlTextWriterWriteFormatString(writer(),"%c", ch);
1745
xmlTextWriterEndElement(writer());//bondStereo
1748
void CMLFormat::WriteCrystal(OBMol& mol)
1750
static const xmlChar C_CRYSTAL[] = "crystal";
1751
static const xmlChar C_SCALAR[] = "scalar";
1752
// static const xmlChar C_Z[] = "z";
1753
static const xmlChar C_TITLE[] = "title";
1754
static const xmlChar C_UNITS[] = "units";
1755
static const xmlChar C_SYMMETRY[] = "symmetry";
1756
static const xmlChar C_SPACEGROUP[] = "spaceGroup";
1757
static const xmlChar C_TRANSFORM3[] = "transform3";
1759
pUnitCell = (OBUnitCell*)mol.GetData(OBGenericDataType::UnitCell);
1761
xmlTextWriterStartElementNS(writer(), prefix, C_CRYSTAL, NULL);
1762
// xmlTextWriterWriteFormatAttribute(writer(), C_z,"%d", number of molecules per cell);
1764
xmlTextWriterStartElementNS(writer(), prefix, C_SCALAR, NULL);
1765
xmlTextWriterWriteFormatAttribute(writer(), C_TITLE,"%s", "a");
1766
xmlTextWriterWriteFormatAttribute(writer(), C_UNITS,"%s", "units:angstrom");
1767
xmlTextWriterWriteFormatString(writer(),"%f", pUnitCell->GetA());
1768
xmlTextWriterEndElement(writer());//scalar
1770
xmlTextWriterStartElementNS(writer(), prefix, C_SCALAR, NULL);
1771
xmlTextWriterWriteFormatAttribute(writer(), C_TITLE,"%s", "b");
1772
xmlTextWriterWriteFormatAttribute(writer(), C_UNITS,"%s", "units:angstrom");
1773
xmlTextWriterWriteFormatString(writer(),"%f", pUnitCell->GetB());
1774
xmlTextWriterEndElement(writer());//scalar
1776
xmlTextWriterStartElementNS(writer(), prefix, C_SCALAR, NULL);
1777
xmlTextWriterWriteFormatAttribute(writer(), C_TITLE,"%s", "c");
1778
xmlTextWriterWriteFormatAttribute(writer(), C_UNITS,"%s", "units:angstrom");
1779
xmlTextWriterWriteFormatString(writer(),"%f", pUnitCell->GetC());
1780
xmlTextWriterEndElement(writer());//scalar
1782
xmlTextWriterStartElementNS(writer(), prefix, C_SCALAR, NULL);
1783
xmlTextWriterWriteFormatAttribute(writer(), C_TITLE,"%s", "alpha");
1784
xmlTextWriterWriteFormatAttribute(writer(), C_UNITS,"%s", "units:degree");
1785
xmlTextWriterWriteFormatString(writer(),"%f", pUnitCell->GetAlpha());
1786
xmlTextWriterEndElement(writer());//scalar
1788
xmlTextWriterStartElementNS(writer(), prefix, C_SCALAR, NULL);
1789
xmlTextWriterWriteFormatAttribute(writer(), C_TITLE,"%s", "beta");
1790
xmlTextWriterWriteFormatAttribute(writer(), C_UNITS,"%s", "units:degree");
1791
xmlTextWriterWriteFormatString(writer(),"%f", pUnitCell->GetBeta());
1792
xmlTextWriterEndElement(writer());//scalar
1794
xmlTextWriterStartElementNS(writer(), prefix, C_SCALAR, NULL);
1795
xmlTextWriterWriteFormatAttribute(writer(), C_TITLE,"%s", "gamma");
1796
xmlTextWriterWriteFormatAttribute(writer(), C_UNITS,"%s", "units:degree");
1797
xmlTextWriterWriteFormatString(writer(),"%f", pUnitCell->GetGamma());
1798
xmlTextWriterEndElement(writer());//scalar
1800
const SpaceGroup *group = pUnitCell->GetSpaceGroup();
1804
xmlTextWriterStartElementNS(writer(), prefix, C_SYMMETRY, NULL);
1805
xmlTextWriterWriteAttribute (writer(), C_SPACEGROUP, (const xmlChar*)group->GetHallName().c_str());
1806
transform3dIterator ti;
1807
const transform3d *t = group->BeginTransform(ti);
1811
s = t->DescribeAsValues() + " 0 0 0 1";
1812
xmlTextWriterWriteElement(writer(), C_TRANSFORM3, (const xmlChar*)s.c_str());
1813
t = group->NextTransform(ti);
1815
xmlTextWriterEndElement(writer());//symmetry
1819
//s = pUnitCell.GetSpaceGroupName();
1820
s = pUnitCell->GetSpaceGroupName();
1823
xmlTextWriterStartElementNS(writer(), prefix, C_SYMMETRY, NULL);
1824
xmlTextWriterWriteAttribute (writer(), C_SPACEGROUP, (const xmlChar*)s.c_str());
1825
xmlTextWriterEndElement(writer());//symmetry
1829
xmlTextWriterEndElement(writer());//crystal
1832
void CMLFormat::WriteProperties(OBMol& mol, bool& propertyListWritten)
1834
// static const xmlChar C_DICTREF[] = "dictRef";
1835
static const xmlChar C_PROPERTYLIST[] = "propertyList";
1836
static const xmlChar C_PROPERTY[] = "property";
1837
static const xmlChar C_SCALAR[] = "scalar";
1838
static const xmlChar C_TITLE[] = "title";
1840
vector<OBGenericData*>::iterator k;
1841
vector<OBGenericData*> vdata = mol.GetData();
1842
for (k = vdata.begin();k != vdata.end();k++)
1844
if ((*k)->GetDataType() == OBGenericDataType::PairData
1845
&& (*k)->GetAttribute()!="InChI" //InChI is output in <identifier>
1846
&& (*k)->GetAttribute()!="PartialCharges")//annotation not needed since partial charges are not output in this format
1848
if(!propertyListWritten)
1850
xmlTextWriterStartElementNS(writer(), prefix, C_PROPERTYLIST, NULL);
1851
propertyListWritten=true;
1853
xmlTextWriterStartElementNS(writer(), prefix, C_PROPERTY, NULL);
1854
//Title is now on <property>
1855
xmlTextWriterWriteFormatAttribute(writer(), C_TITLE,"%s",(*k)->GetAttribute().c_str());
1856
xmlTextWriterStartElementNS(writer(), prefix, C_SCALAR, NULL);
1857
//Title used to be on <scalar>...
1858
//xmlTextWriterWriteFormatAttribute(writer(), C_TITLE,"%s",(*k)->GetAttribute().c_str());
1859
xmlTextWriterWriteFormatString(writer(),"%s", (static_cast<OBPairData*>(*k))->GetValue().c_str());
1860
xmlTextWriterEndElement(writer());//scalar
1861
xmlTextWriterEndElement(writer());//property
1864
if(mol.HasData(OBGenericDataType::VibrationData))
1865
WriteVibrationData(mol);
1866
if(mol.HasData(OBGenericDataType::RotationData))
1867
WriteRotationData(mol);
1871
void CMLFormat::WriteThermo(OBMol& mol, bool& propertyListWritten)
1873
static const xmlChar C_PROPERTYLIST[] = "propertyList";
1874
static const xmlChar C_PROPERTY[] = "property";
1875
static const xmlChar C_SCALAR[] = "scalar";
1876
static const xmlChar C_ARRAY[] = "array";
1877
static const xmlChar C_DICTREF[] = "dictRef";
1878
static const xmlChar C_SIZE[] = "size";
1880
OBNasaThermoData* pThermoData = static_cast<OBNasaThermoData*>(mol.GetData(ThermoData));
1882
if(!propertyListWritten)
1884
xmlTextWriterStartElementNS(writer(), prefix, C_PROPERTYLIST, NULL);
1885
propertyListWritten=true;
1888
xmlTextWriterStartElementNS(writer(), prefix, C_PROPERTY, NULL);
1889
xmlTextWriterWriteFormatAttribute(writer(), C_DICTREF,"%s","Thermo_OldNasa");
1891
xmlTextWriterStartElementNS(writer(), prefix, C_SCALAR, NULL);
1892
xmlTextWriterWriteFormatAttribute(writer(), C_DICTREF,"%s","NasaLowT");
1893
xmlTextWriterWriteFormatString(writer(),"%.1f", pThermoData->GetLoT());
1894
xmlTextWriterEndElement(writer());//scalar
1896
xmlTextWriterStartElementNS(writer(), prefix, C_SCALAR, NULL);
1897
xmlTextWriterWriteFormatAttribute(writer(), C_DICTREF,"%s","NasaHighT");
1898
xmlTextWriterWriteFormatString(writer(),"%.1f", pThermoData->GetHiT());
1899
xmlTextWriterEndElement(writer());//scalar
1901
xmlTextWriterStartElementNS(writer(), prefix, C_SCALAR, NULL);
1902
xmlTextWriterWriteFormatAttribute(writer(), C_DICTREF,"%s","NasaMidT");
1903
xmlTextWriterWriteFormatString(writer(),"%.1f", pThermoData->GetMidT());
1904
xmlTextWriterEndElement(writer());//scalar
1906
xmlTextWriterStartElementNS(writer(), prefix, C_SCALAR, NULL);
1907
xmlTextWriterWriteFormatAttribute(writer(), C_DICTREF,"%s","Phase");
1908
xmlTextWriterWriteFormatString(writer(),"%c", pThermoData->GetPhase());
1909
xmlTextWriterEndElement(writer());//scalar
1911
xmlTextWriterStartElementNS(writer(), prefix, C_ARRAY, NULL);
1912
xmlTextWriterWriteFormatAttribute(writer(), C_DICTREF,"%s","NasaCoeffs");
1913
xmlTextWriterWriteFormatAttribute(writer(), C_SIZE,"%d",14);
1914
for(int i=0;i<14;++i)
1915
xmlTextWriterWriteFormatString(writer()," %e", pThermoData->GetCoeff(i));
1916
xmlTextWriterEndElement(writer());//array
1918
xmlTextWriterEndElement(writer());//property
1921
///Returns molecule title or molecule number if there is no title together with the file name
1922
string CMLFormat::GetMolID()
1925
if(strlen(_pmol->GetTitle())==0)
1926
molID << "Mol #" << _pxmlConv->GetOutputIndex()+1;
1928
molID << _pmol->GetTitle();
1930
string fn(_pxmlConv->GetInFilename());
1931
//Get file name: remove path
1932
string::size_type pos = fn.rfind(DLHandler::getSeparator());
1933
if(pos!=string::npos)
1935
molID << " (in " << fn << ')';
1939
bool CMLFormat::WriteInChI(OBMol& mol)
1941
//If OBPair data has an entry with attribute "inchi" it is not
1942
//output in the property list but as a separate element in the form:
1943
//<identifier convention="iupac:inchi" value="InChI=1/CH4/h1H4"/>
1944
static const xmlChar C_IDENTIFIER[] = "identifier";
1945
static const xmlChar C_CONVENTION[] = "convention";
1946
static const xmlChar C_VALUE[] = "value";
1947
OBPairData* pData = dynamic_cast<OBPairData*>(mol.GetData("InChI"));
1950
xmlTextWriterStartElementNS(writer(), prefix, C_IDENTIFIER, NULL);
1951
xmlTextWriterWriteFormatAttribute(writer(), C_CONVENTION,"%s","iupac:inchi");
1952
xmlTextWriterWriteFormatAttribute(writer(), C_VALUE,"%s", pData->GetValue().c_str());
1953
xmlTextWriterEndElement(writer());//identifier
1956
return false; //not written
1959
bool CMLFormat::WriteVibrationData(OBMol& mol)
1961
static const xmlChar C_PROPERTY[] = "property";
1962
static const xmlChar C_SCALAR[] = "scalar";
1963
static const xmlChar C_ARRAY[] = "array";
1964
static const xmlChar C_DICTREF[] = "dictRef";
1965
static const xmlChar C_UNITS[] = "units";
1966
static const xmlChar C_TITLE[] = "title";
1968
OBVibrationData* vd = (OBVibrationData*)mol.GetData(OBGenericDataType::VibrationData);
1970
xmlTextWriterStartElementNS(writer(), prefix, C_PROPERTY, NULL);
1971
xmlTextWriterWriteFormatAttribute(writer(), C_TITLE,"%s","Vibrational Frequencies");
1972
xmlTextWriterWriteFormatAttribute(writer(), C_DICTREF,"%s","me:vibFreqs");
1974
xmlTextWriterStartElementNS(writer(), prefix, C_ARRAY, NULL);
1975
xmlTextWriterWriteFormatAttribute(writer(), C_UNITS,"%s","cm-1");
1976
for(int i=0; i<vd->GetNumberOfFrequencies(); ++i)
1977
xmlTextWriterWriteFormatString(writer(),"%.lf ", vd->GetFrequencies()[i]);
1978
xmlTextWriterEndElement(writer());//array
1979
xmlTextWriterEndElement(writer());//property
1983
bool CMLFormat::WriteRotationData(OBMol& mol)
1985
static const xmlChar C_PROPERTY[] = "property";
1986
static const xmlChar C_SCALAR[] = "scalar";
1987
static const xmlChar C_ARRAY[] = "array";
1988
static const xmlChar C_DICTREF[] = "dictRef";
1989
static const xmlChar C_UNITS[] = "units";
1990
static const xmlChar C_TITLE[] = "title";
1992
OBRotationData* rd = (OBRotationData*)mol.GetData(OBGenericDataType::RotationData);
1994
xmlTextWriterStartElementNS(writer(), prefix, C_PROPERTY, NULL);
1995
xmlTextWriterWriteFormatAttribute(writer(), C_TITLE,"%s","Rotational Constants");
1996
xmlTextWriterWriteFormatAttribute(writer(), C_DICTREF,"%s","me:rotConsts");
1998
xmlTextWriterStartElementNS(writer(), prefix, C_ARRAY, NULL);
1999
xmlTextWriterWriteFormatAttribute(writer(), C_UNITS,"%s","cm-1");
2000
const double WAVENUM_TO_GHZ=30.0;
2001
for(int i=0; i<3; ++i)
2002
if(rd->GetRotConsts()[i]!=0.0)
2003
xmlTextWriterWriteFormatString(writer(),"%.1f ", rd->GetRotConsts()[i]/WAVENUM_TO_GHZ);
2004
xmlTextWriterEndElement(writer());//array
2005
xmlTextWriterEndElement(writer());//property
2006
xmlTextWriterStartElementNS(writer(), prefix, C_PROPERTY, NULL);
2007
xmlTextWriterWriteFormatAttribute(writer(), C_TITLE,"%s","Symmetry Number");
2008
xmlTextWriterWriteFormatAttribute(writer(), C_DICTREF,"%s","me:symmetryNumber");
2010
xmlTextWriterStartElementNS(writer(), prefix, C_SCALAR, NULL);
2011
xmlTextWriterWriteFormatString(writer(),"%d ", rd->GetSymmetryNumber());
2012
xmlTextWriterEndElement(writer());//scalar
2013
xmlTextWriterEndElement(writer());//property
2017
bool CMLFormat::WriteChemObject(OBConversion* pConv)
2019
int OIndex = pConv->GetOutputIndex();
2020
OBBase* pOb = pConv->GetChemObject();
2021
if(dynamic_cast<OBMol*> (pOb))
2023
//With an OBMol object, do the same as if this function wasn't defined,
2024
//i.e.access the functionality in OBMoleculeFormat
2026
//restore output index which is (unhelpfully) incremented by GetChemObject
2027
pConv->SetOutputIndex(OIndex);
2028
return XMLMoleculeFormat::WriteChemObject(pConv);
2031
//With OBReaction object, handle directly in CMLFormat::WriteMolecule
2032
bool ret = WriteMolecule(pOb,pConv);