1
/* $Revision: 8301 $ $Author: egonw $ $Date: 2007-05-05 14:00:36 +0200 (Sat, 05 May 2007) $
3
* Copyright (C) 1997-2007 Egon Willighagen <egonw@users.sf.net>
5
* Contact: cdk-devel@lists.sourceforge.net
7
* This program is free software; you can redistribute it and/or
8
* modify it under the terms of the GNU Lesser General Public License
9
* as published by the Free Software Foundation; either version 2.1
10
* of the License, or (at your option) any later version.
11
* All we ask is that proper credit is given for our work, which includes
12
* - but is not limited to - adding the above copyright notice to the beginning
13
* of your source code files, and to any copyright notice that you may distribute
14
* with programs based on this work.
16
* This program is distributed in the hope that it will be useful,
17
* but WITHOUT ANY WARRANTY; without even the implied warranty of
18
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
* GNU Lesser General Public License for more details.
21
* You should have received a copy of the GNU Lesser General Public License
22
* along with this program; if not, write to the Free Software
23
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
26
package org.openscience.cdk.io.cml;
28
import java.util.ArrayList;
29
import java.util.HashMap;
30
import java.util.Iterator;
31
import java.util.List;
33
import java.util.StringTokenizer;
35
import javax.vecmath.Point2d;
36
import javax.vecmath.Point3d;
37
import javax.vecmath.Vector3d;
39
import org.openscience.cdk.CDKConstants;
40
import org.openscience.cdk.config.IsotopeFactory;
41
import org.openscience.cdk.dict.DictRef;
42
import org.openscience.cdk.geometry.CrystalGeometryTools;
43
import org.openscience.cdk.interfaces.IAtom;
44
import org.openscience.cdk.interfaces.IAtomContainer;
45
import org.openscience.cdk.interfaces.IBond;
46
import org.openscience.cdk.interfaces.IChemFile;
47
import org.openscience.cdk.interfaces.IChemModel;
48
import org.openscience.cdk.interfaces.IChemSequence;
49
import org.openscience.cdk.interfaces.ICrystal;
50
import org.openscience.cdk.interfaces.IMolecule;
51
import org.openscience.cdk.interfaces.IMoleculeSet;
52
import org.openscience.cdk.interfaces.IMonomer;
53
import org.openscience.cdk.interfaces.IPseudoAtom;
54
import org.openscience.cdk.interfaces.IReaction;
55
import org.openscience.cdk.interfaces.IReactionSet;
56
import org.openscience.cdk.interfaces.IStrand;
57
import org.openscience.cdk.tools.LoggingTool;
58
import org.xml.sax.Attributes;
61
* Core CML 1.x and 2.0 elements are parsed by this class.
63
* <p>Please file a bug report if this parser fails to parse
64
* a certain element or attribute value in a valid CML document.
68
* @author Egon Willighagen <egonw@sci.kun.nl>
70
public class CMLCoreModule implements ICMLModule {
72
protected org.openscience.cdk.tools.LoggingTool logger;
73
protected final String SYSTEMID = "CML-1999-05-15";
74
// protected IChemicalDocumentObject cdo;
76
// data model to store things into
77
protected IChemFile currentChemFile;
79
protected IAtomContainer currentMolecule;
80
protected IMoleculeSet currentMoleculeSet;
81
protected IChemModel currentChemModel;
82
protected IChemSequence currentChemSequence;
83
protected IReactionSet currentReactionSet;
84
protected IReaction currentReaction;
85
protected IAtom currentAtom;
86
protected IBond currentBond;
87
protected IStrand currentStrand;
88
protected IMonomer currentMonomer;
89
protected Map atomEnumeration;
92
protected int atomCounter;
94
protected List eltitles;
96
protected List formalCharges;
97
protected List partialCharges;
98
protected List isotope;
104
protected List xfract;
105
protected List yfract;
106
protected List zfract;
107
protected List hCounts;
108
protected List atomParities;
109
protected List atomDictRefs;
110
protected List spinMultiplicities;
111
protected List occupancies;
113
protected int bondCounter;
114
protected List bondid;
115
protected List bondARef1;
116
protected List bondARef2;
117
protected List order;
118
protected List bondStereo;
119
protected List bondDictRefs;
120
protected List bondElid;
121
protected List bondAromaticity;
122
protected boolean stereoGiven;
123
protected String inchi;
124
protected int curRef;
125
protected int CurrentElement;
126
protected String BUILTIN;
127
protected String DICTREF;
128
protected String elementTitle;
129
protected String currentChars;
131
protected double[] unitcellparams;
132
protected int crystalScalar;
134
// private Vector3d aAxis;
135
// private Vector3d bAxis;
136
// private Vector3d cAxis;
137
boolean cartesianAxesSet = false;
139
public CMLCoreModule(IChemFile chemFile) {
140
logger = new LoggingTool(this);
141
this.currentChemFile = chemFile;
144
public CMLCoreModule(ICMLModule conv) {
145
logger = new LoggingTool(this);
149
public void inherit(ICMLModule convention) {
150
if (convention instanceof CMLCoreModule) {
151
CMLCoreModule conv = (CMLCoreModule)convention;
153
// copy the data model
154
this.currentChemFile = conv.currentChemFile;
155
this.currentMolecule = conv.currentMolecule;
156
this.currentMoleculeSet = conv.currentMoleculeSet;
157
this.currentChemModel = conv.currentChemModel;
158
this.currentChemSequence = conv.currentChemSequence;
159
this.currentReactionSet = conv.currentReactionSet;
160
this.currentReaction = conv.currentReaction;
161
this.currentAtom = conv.currentAtom;
162
this.currentStrand = conv.currentStrand;
163
this.currentMonomer = conv.currentMonomer;
164
this.atomEnumeration = conv.atomEnumeration;
166
// copy the intermediate fields
167
this.logger = conv.logger;
168
this.BUILTIN = conv.BUILTIN;
169
this.atomCounter = conv.atomCounter;
170
this.elsym = conv.elsym;
171
this.eltitles = conv.eltitles;
172
this.elid = conv.elid;
173
this.formalCharges = conv.formalCharges;
174
this.partialCharges = conv.partialCharges;
175
this.isotope = conv.isotope;
181
this.xfract = conv.xfract;
182
this.yfract = conv.yfract;
183
this.zfract = conv.zfract;
184
this.hCounts = conv.hCounts;
185
this.atomParities = conv.atomParities;
186
this.atomDictRefs = conv.atomDictRefs;
187
this.spinMultiplicities = conv.spinMultiplicities;
188
this.occupancies = conv.occupancies;
189
this.bondCounter = conv.bondCounter;
190
this.bondid = conv.bondid;
191
this.bondARef1 = conv.bondARef1;
192
this.bondARef2 = conv.bondARef2;
193
this.order = conv.order;
194
this.bondStereo = conv.bondStereo;
195
this.bondDictRefs = conv.bondDictRefs;
196
this.bondAromaticity = conv.bondAromaticity;
197
this.curRef = conv.curRef;
198
this.unitcellparams = conv.unitcellparams;
199
this.inchi = conv.inchi;
201
logger.warn("Cannot inherit information from module: ", convention.getClass().getName());
205
public IChemFile returnChemFile() {
206
return currentChemFile;
210
* Clean all data about parsed data.
212
protected void newMolecule() {
220
* Clean all data about the molecule itself.
222
protected void newMoleculeData() {
227
* Clean all data about read atoms.
229
protected void newAtomData() {
231
elsym = new ArrayList();
232
elid = new ArrayList();
233
eltitles = new ArrayList();
234
formalCharges = new ArrayList();
235
partialCharges = new ArrayList();
236
isotope = new ArrayList();
237
x3 = new ArrayList();
238
y3 = new ArrayList();
239
z3 = new ArrayList();
240
x2 = new ArrayList();
241
y2 = new ArrayList();
242
xfract = new ArrayList();
243
yfract = new ArrayList();
244
zfract = new ArrayList();
245
hCounts = new ArrayList();
246
atomParities = new ArrayList();
247
atomDictRefs = new ArrayList();
248
spinMultiplicities = new ArrayList();
249
occupancies = new ArrayList();
253
* Clean all data about read bonds.
255
protected void newBondData() {
257
bondid = new ArrayList();
258
bondARef1 = new ArrayList();
259
bondARef2 = new ArrayList();
260
order = new ArrayList();
261
bondStereo = new ArrayList();
262
bondDictRefs = new ArrayList();
263
bondElid = new ArrayList();
264
bondAromaticity = new ArrayList();
268
* Clean all data about read bonds.
270
protected void newCrystalData() {
271
unitcellparams = new double[6];
272
cartesianAxesSet = false;
274
// aAxis = new Vector3d();
275
// bAxis = new Vector3d();
276
// cAxis = new Vector3d();
279
public void startDocument() {
280
logger.info("Start XML Doc");
281
// cdo.startDocument();
282
currentChemSequence = currentChemFile.getBuilder().newChemSequence();
283
currentChemModel = currentChemFile.getBuilder().newChemModel();
284
currentMoleculeSet = currentChemFile.getBuilder().newMoleculeSet();
285
currentMolecule = currentChemFile.getBuilder().newMolecule();
286
atomEnumeration = new HashMap();
293
public void endDocument() {
294
// cdo.endDocument();
295
if (currentReactionSet != null && currentReactionSet.getReactionCount() == 0
296
&& currentReaction != null) {
297
logger.debug("Adding reaction to ReactionSet");
298
currentReactionSet.addReaction(currentReaction);
300
if (currentReactionSet != null && currentChemModel.getReactionSet() == null) {
301
logger.debug("Adding SOR to ChemModel");
302
currentChemModel.setReactionSet(currentReactionSet);
304
if (currentMoleculeSet != null && currentMoleculeSet.getMoleculeCount() != 0) {
305
logger.debug("Adding reaction to MoleculeSet");
306
currentChemModel.setMoleculeSet(currentMoleculeSet);
308
if (currentChemSequence.getChemModelCount() == 0) {
309
logger.debug("Adding ChemModel to ChemSequence");
310
currentChemSequence.addChemModel(currentChemModel);
312
if (currentChemFile.getChemSequenceCount() == 0) {
313
// assume there is one non-animation ChemSequence
314
// addChemSequence(currentChemSequence);
315
currentChemFile.addChemSequence(currentChemSequence);
318
logger.info("End XML Doc");
321
public void startElement(CMLStack xpath, String uri, String local, String raw,
324
logger.debug("StartElement");
330
for (int i=0; i<atts.getLength(); i++) {
331
String qname = atts.getQName(i);
332
if (qname.equals("builtin")) {
333
BUILTIN = atts.getValue(i);
334
logger.debug(name, "->BUILTIN found: ", atts.getValue(i));
335
} else if (qname.equals("dictRef")) {
336
DICTREF = atts.getValue(i);
337
logger.debug(name, "->DICTREF found: ", atts.getValue(i));
338
} else if (qname.equals("title")) {
339
elementTitle = atts.getValue(i);
340
logger.debug(name, "->TITLE found: ", atts.getValue(i));
342
logger.debug("Qname: ", qname);
346
if ("atom".equals(name)) {
348
for (int i = 0; i < atts.getLength(); i++) {
350
String att = atts.getQName(i);
351
String value = atts.getValue(i);
353
if (att.equals("id")) { // this is supported in CML 1.x
355
} // this is supported in CML 2.0
356
else if (att.equals("elementType")) {
358
} // this is supported in CML 2.0
359
else if (att.equals("title")) {
361
} // this is supported in CML 2.0
362
else if (att.equals("x2")) {
364
} // this is supported in CML 2.0
365
else if (att.equals("xy2")) {
366
StringTokenizer tokenizer = new StringTokenizer(value);
367
x2.add(tokenizer.nextToken());
368
y2.add(tokenizer.nextToken());
369
} // this is supported in CML 2.0
370
else if (att.equals("xyzFract")) {
371
StringTokenizer tokenizer = new StringTokenizer(value);
372
xfract.add(tokenizer.nextToken());
373
yfract.add(tokenizer.nextToken());
374
zfract.add(tokenizer.nextToken());
375
} // this is supported in CML 2.0
376
else if (att.equals("xyz3")) {
377
StringTokenizer tokenizer = new StringTokenizer(value);
378
x3.add(tokenizer.nextToken());
379
y3.add(tokenizer.nextToken());
380
z3.add(tokenizer.nextToken());
381
} // this is supported in CML 2.0
382
else if (att.equals("y2")) {
384
} // this is supported in CML 2.0
385
else if (att.equals("x3")) {
387
} // this is supported in CML 2.0
388
else if (att.equals("y3")) {
390
} // this is supported in CML 2.0
391
else if (att.equals("z3")) {
393
} // this is supported in CML 2.0
394
else if (att.equals("xFract")) {
396
} // this is supported in CML 2.0
397
else if (att.equals("yFract")) {
399
} // this is supported in CML 2.0
400
else if (att.equals("zFract")) {
402
} // this is supported in CML 2.0
403
else if (att.equals("formalCharge")) {
404
formalCharges.add(value);
405
} // this is supported in CML 2.0
406
else if (att.equals("hydrogenCount")) {
409
else if (att.equals("isotope")) {
412
else if (att.equals("dictRef")) {
413
logger.debug("ocupaccy: "+value);
414
atomDictRefs.add(value);
416
else if (att.equals("spinMultiplicity")) {
417
spinMultiplicities.add(value);
419
else if (att.equals("occupancy")) {
420
occupancies.add(value);
424
logger.warn("Unparsed attribute: " + att);
427
} else if ("atomArray".equals(name) &&
428
!xpath.endsWith("formula", "atomArray")) {
429
boolean atomsCounted = false;
430
for (int i = 0; i < atts.getLength(); i++) {
431
String att = atts.getQName(i);
433
if (att.equals("atomID")) {
434
count = addArrayElementsTo(elid, atts.getValue(i));
435
} else if (att.equals("elementType")) {
436
count = addArrayElementsTo(elsym, atts.getValue(i));
437
} else if (att.equals("x2")) {
438
count = addArrayElementsTo(x2, atts.getValue(i));
439
} else if (att.equals("y2")) {
440
count = addArrayElementsTo(y2, atts.getValue(i));
441
} else if (att.equals("x3")) {
442
count = addArrayElementsTo(x3, atts.getValue(i));
443
} else if (att.equals("y3")) {
444
count = addArrayElementsTo(y3, atts.getValue(i));
445
} else if (att.equals("z3")) {
446
count = addArrayElementsTo(z3, atts.getValue(i));
447
} else if (att.equals("xFract")) {
448
count = addArrayElementsTo(xfract, atts.getValue(i));
449
} else if (att.equals("yFract")) {
450
count = addArrayElementsTo(yfract, atts.getValue(i));
451
} else if (att.equals("zFract")) {
452
count = addArrayElementsTo(zfract, atts.getValue(i));
454
logger.warn("Unparsed attribute: " + att);
457
atomCounter += count;
461
} else if ("bond".equals(name)) {
463
for (int i = 0; i < atts.getLength(); i++) {
464
String att = atts.getQName(i);
465
logger.debug("B2 ", att, "=", atts.getValue(i));
467
if (att.equals("id")) {
468
bondid.add(atts.getValue(i));
469
logger.debug("B3 ", bondid);
470
} else if (att.equals("atomRefs") || // this is CML 1.x support
471
att.equals("atomRefs2")) { // this is CML 2.0 support
473
// expect exactly two references
475
StringTokenizer st = new StringTokenizer(
478
bondARef1.add((String)st.nextElement());
479
bondARef2.add((String)st.nextElement());
480
} catch (Exception e) {
481
logger.error("Error in CML file: ", e.getMessage());
484
} else if (att.equals("order")) { // this is CML 2.0 support
485
order.add(atts.getValue(i).trim());
486
} else if (att.equals("dictRef")) {
487
bondDictRefs.add(atts.getValue(i).trim());
493
} else if ("bondArray".equals(name)) {
494
boolean bondsCounted = false;
495
for (int i = 0; i < atts.getLength(); i++) {
496
String att = atts.getQName(i);
498
if (att.equals("bondID")) {
499
count = addArrayElementsTo(bondid, atts.getValue(i));
500
} else if (att.equals("atomRefs1")) {
501
count = addArrayElementsTo(bondARef1, atts.getValue(i));
502
} else if (att.equals("atomRefs2")) {
503
count = addArrayElementsTo(bondARef2, atts.getValue(i));
504
} else if (att.equals("atomRef1")) {
505
count = addArrayElementsTo(bondARef1, atts.getValue(i));
506
} else if (att.equals("atomRef2")) {
507
count = addArrayElementsTo(bondARef2, atts.getValue(i));
508
} else if (att.equals("order")) {
509
count = addArrayElementsTo(order, atts.getValue(i));
511
logger.warn("Unparsed attribute: " + att);
514
bondCounter += count;
519
} else if ("bondStereo".equals(name)) {
520
for (int i = 0; i < atts.getLength(); i++) {
521
if (atts.getQName(i).equals("dictRef")) {
522
if (atts.getValue(i).startsWith("cml:"))
523
bondStereo.add(atts.getValue(i).substring(4));
527
} else if ("bondType".equals(name)) {
528
for (int i = 0; i < atts.getLength(); i++) {
529
if (atts.getQName(i).equals("dictRef")) {
530
if (atts.getValue(i).equals("cdk:aromaticBond"))
531
bondAromaticity.add(Boolean.TRUE);
534
} else if ("molecule".equals(name)) {
537
// cdo.startObject("Molecule");
538
if (currentChemModel == null) currentChemModel = currentChemFile.getBuilder().newChemModel();
539
if (currentMoleculeSet == null) currentMoleculeSet = currentChemFile.getBuilder().newMoleculeSet();
540
currentMolecule = currentChemFile.getBuilder().newMolecule();
541
for (int i = 0; i < atts.getLength(); i++) {
542
if (atts.getQName(i).equals("id")) {
543
// cdo.setObjectProperty("Molecule", "id", atts.getValue(i));
544
currentMolecule.setID(atts.getValue(i));
545
} else if (atts.getQName(i).equals("dictRef")) {
546
// cdo.setObjectProperty("Molecule", "dictRef", atts.getValue(i));
547
currentMolecule.setProperty(new DictRef(DICTREF, atts.getValue(i)), atts.getValue(i));
550
} else if ("crystal".equals(name)) {
552
// cdo.startObject("Crystal");
553
currentMolecule = currentChemFile.getBuilder().newCrystal(currentMolecule);
554
for (int i = 0; i < atts.getLength(); i++) {
555
String att = atts.getQName(i);
556
if (att.equals("z")) {
557
// cdo.setObjectProperty("Crystal", "z", atts.getValue(i));
558
((ICrystal)currentMolecule).setZ(Integer.parseInt(atts.getValue(i)));
561
} else if ("symmetry".equals(name)) {
562
for (int i = 0; i < atts.getLength(); i++) {
563
String att = atts.getQName(i);
564
if (att.equals("spaceGroup")) {
565
// cdo.setObjectProperty("Crystal", "spacegroup", atts.getValue(i));
566
((ICrystal)currentMolecule).setSpaceGroup(atts.getValue(i));
569
} else if ("identifier".equals(name)) {
570
if (atts.getValue("convention") != null &&
571
atts.getValue("convention").equals("iupac:inchi") &&
572
atts.getValue("value") != null) {
573
// cdo.setObjectProperty("Molecule", "inchi", atts.getValue("value"));
574
currentMolecule.setProperty(CDKConstants.INCHI, atts.getValue("value"));
576
} else if ("scalar".equals(name)) {
577
if (xpath.endsWith("crystal", "scalar"))
579
} else if ("label".equals(name)) {
580
if (xpath.endsWith("atomType", "label")) {
581
// cdo.setObjectProperty("Atom", "atomTypeLabel", atts.getValue("value"));
582
currentAtom.setAtomTypeName(atts.getValue("value"));
584
} else if ("list".equals(name)) {
585
// cdo.startObject("MoleculeSet");
586
if (DICTREF.equals("cdk:model")) {
587
currentChemModel = currentChemFile.getBuilder().newChemModel();
588
} else if (DICTREF.equals("cdk:moleculeSet")) {
589
currentMoleculeSet = currentChemFile.getBuilder().newMoleculeSet();
590
currentMolecule = currentChemFile.getBuilder().newMolecule();
593
currentMoleculeSet = currentChemFile.getBuilder().newMoleculeSet();
594
currentMolecule = currentChemFile.getBuilder().newMolecule();
599
public void endElement(CMLStack xpath, String uri, String name, String raw) {
600
logger.debug("EndElement: ", name);
602
String cData = currentChars;
603
if ("bond".equals(name)) {
606
if (bondStereo.size() > bondDictRefs.size())
607
bondDictRefs.add(null);
608
if (bondAromaticity.size() > bondDictRefs.size())
609
bondAromaticity.add(null);
610
} else if ("atom".equals(name)) {
611
if (atomCounter > eltitles.size()) {
614
if (atomCounter > hCounts.size()) {
615
/* while strictly undefined, assume zero
616
implicit hydrogens when no number is given */
619
if (atomCounter > atomDictRefs.size()) {
620
atomDictRefs.add(null);
622
if (atomCounter > isotope.size()) {
625
if (atomCounter > spinMultiplicities.size()) {
626
spinMultiplicities.add(null);
628
if (atomCounter > occupancies.size()) {
629
occupancies.add(null);
631
if (atomCounter > formalCharges.size()) {
632
/* while strictly undefined, assume zero
633
implicit hydrogens when no number is given */
634
formalCharges.add("0");
636
/* It may happen that not all atoms have
637
associated 2D or 3D coordinates. accept that */
638
if (atomCounter > x2.size() && x2.size() != 0) {
639
/* apparently, the previous atoms had atomic
640
coordinates, add 'null' for this atom */
644
if (atomCounter > x3.size() && x3.size() != 0) {
645
/* apparently, the previous atoms had atomic
646
coordinates, add 'null' for this atom */
652
if (atomCounter > xfract.size() && xfract.size() != 0) {
653
/* apparently, the previous atoms had atomic
654
coordinates, add 'null' for this atom */
659
} else if ("molecule".equals(name)) {
661
// cdo.endObject("Molecule");
662
if (currentMolecule instanceof IMolecule) {
663
logger.debug("Adding molecule to set");
664
currentMoleculeSet.addMolecule((IMolecule)currentMolecule);
665
logger.debug("#mols in set: " + currentMoleculeSet.getMoleculeCount());
666
} else if (currentMolecule instanceof ICrystal) {
667
logger.debug("Adding crystal to chemModel");
668
currentChemModel.setCrystal((ICrystal)currentMolecule);
669
currentChemSequence.addChemModel(currentChemModel);
671
} else if ("crystal".equals(name)) {
672
if (crystalScalar > 0) {
673
// convert unit cell parameters to cartesians
674
Vector3d[] axes = CrystalGeometryTools.notionalToCartesian(
675
unitcellparams[0], unitcellparams[1], unitcellparams[2],
676
unitcellparams[3], unitcellparams[4], unitcellparams[5]
678
cartesianAxesSet = true;
679
// cdo.startObject("a-axis");
680
// cdo.setObjectProperty("a-axis", "x", new Double(aAxis.x).toString());
681
// cdo.setObjectProperty("a-axis", "y", new Double(aAxis.y).toString());
682
// cdo.setObjectProperty("a-axis", "z", new Double(aAxis.z).toString());
683
// cdo.endObject("a-axis");
684
// cdo.startObject("b-axis");
685
// cdo.setObjectProperty("b-axis", "x", new Double(bAxis.x).toString());
686
// cdo.setObjectProperty("b-axis", "y", new Double(bAxis.y).toString());
687
// cdo.setObjectProperty("b-axis", "z", new Double(bAxis.z).toString());
688
// cdo.endObject("b-axis");
689
// cdo.startObject("c-axis");
690
// cdo.setObjectProperty("c-axis", "x", new Double(cAxis.x).toString());
691
// cdo.setObjectProperty("c-axis", "y", new Double(cAxis.y).toString());
692
// cdo.setObjectProperty("c-axis", "z", new Double(cAxis.z).toString());
693
// cdo.endObject("c-axis");
694
((ICrystal)currentMolecule).setA(axes[0]);
695
((ICrystal)currentMolecule).setB(axes[1]);
696
((ICrystal)currentMolecule).setC(axes[2]);
698
logger.error("Could not find crystal unit cell parameters");
700
// cdo.endObject("Crystal");
701
} else if ("list".equals(name)) {
702
// cdo.endObject("MoleculeSet");
703
// FIXME: I really should check the DICTREF, but there is currently
704
// no mechanism for storing these for use with endTag() :(
705
// So, instead, for now, just see if it already has done the setting
706
// to work around duplication
707
if (currentChemModel.getMoleculeSet() != currentMoleculeSet) {
708
currentChemModel.setMoleculeSet(currentMoleculeSet);
709
currentChemSequence.addChemModel(currentChemModel);
711
} else if ("coordinate3".equals(name)) {
712
if (BUILTIN.equals("xyz3")) {
713
logger.debug("New coord3 xyz3 found: ", currentChars);
717
StringTokenizer st = new StringTokenizer(currentChars);
718
x3.add(st.nextToken());
719
y3.add(st.nextToken());
720
z3.add(st.nextToken());
721
logger.debug("coord3 x3.length: ", x3.size());
722
logger.debug("coord3 y3.length: ", y3.size());
723
logger.debug("coord3 z3.length: ", z3.size());
724
} catch (Exception exception) {
726
"CMLParsing error while setting coordinate3!");
727
logger.debug(exception);
730
logger.warn("Unknown coordinate3 BUILTIN: " + BUILTIN);
732
} else if ("string".equals(name)) {
733
if (BUILTIN.equals("elementType")) {
734
logger.debug("Element: ", cData.trim());
736
} else if (BUILTIN.equals("atomRef")) {
738
logger.debug("Bond: ref #", curRef);
741
bondARef1.add(cData.trim());
742
} else if (curRef == 2) {
743
bondARef2.add(cData.trim());
745
} else if (BUILTIN.equals("order")) {
746
logger.debug("Bond: order ", cData.trim());
747
order.add(cData.trim());
748
} else if (BUILTIN.equals("formalCharge")) {
749
// NOTE: this combination is in violation of the CML DTD!!!
750
logger.warn("formalCharge BUILTIN accepted but violating CML DTD");
751
logger.debug("Charge: ", cData.trim());
752
String charge = cData.trim();
753
if (charge.startsWith("+") && charge.length() > 1) {
754
charge = charge.substring(1);
756
formalCharges.add(charge);
758
} else if ("float".equals(name)) {
759
if (BUILTIN.equals("x3")) {
760
x3.add(cData.trim());
761
} else if (BUILTIN.equals("y3")) {
762
y3.add(cData.trim());
763
} else if (BUILTIN.equals("z3")) {
764
z3.add(cData.trim());
765
} else if (BUILTIN.equals("x2")) {
766
x2.add(cData.trim());
767
} else if (BUILTIN.equals("y2")) {
768
y2.add(cData.trim());
769
} else if (BUILTIN.equals("order")) {
770
// NOTE: this combination is in violation of the CML DTD!!!
771
order.add(cData.trim());
772
} else if (BUILTIN.equals("charge") || BUILTIN.equals("partialCharge")) {
773
partialCharges.add(cData.trim());
775
} else if ("integer".equals(name)) {
776
if (BUILTIN.equals("formalCharge")) {
777
formalCharges.add(cData.trim());
779
} else if ("coordinate2".equals(name)) {
780
if (BUILTIN.equals("xy2")) {
781
logger.debug("New coord2 xy2 found.", cData);
785
StringTokenizer st = new StringTokenizer(cData);
786
x2.add(st.nextToken());
787
y2.add(st.nextToken());
788
} catch (Exception e) {
789
notify("CMLParsing error: " + e, SYSTEMID, 175, 1);
792
} else if ("stringArray".equals(name)) {
793
if (BUILTIN.equals("id") || BUILTIN.equals("atomId")
794
|| BUILTIN.equals("atomID")) { // invalid according to CML1 DTD but found in OpenBabel 1.x output
797
boolean countAtoms = (atomCounter == 0) ? true : false;
798
StringTokenizer st = new StringTokenizer(cData);
800
while (st.hasMoreTokens()) {
801
if (countAtoms) { atomCounter++; }
802
String token = st.nextToken();
803
logger.debug("StringArray (Token): ", token);
806
} catch (Exception e) {
807
notify("CMLParsing error: " + e, SYSTEMID, 186, 1);
809
} else if (BUILTIN.equals("elementType")) {
812
boolean countAtoms = (atomCounter == 0) ? true : false;
813
StringTokenizer st = new StringTokenizer(cData);
815
while (st.hasMoreTokens()) {
816
if (countAtoms) { atomCounter++; }
817
elsym.add(st.nextToken());
819
} catch (Exception e) {
820
notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
822
} else if (BUILTIN.equals("atomRefs")) {
824
logger.debug("New atomRefs found: ", curRef);
827
boolean countBonds = (bondCounter == 0) ? true : false;
828
StringTokenizer st = new StringTokenizer(cData);
830
while (st.hasMoreTokens()) {
831
if (countBonds) { bondCounter++; }
832
String token = st.nextToken();
833
logger.debug("Token: ", token);
836
bondARef1.add(token);
837
} else if (curRef == 2) {
838
bondARef2.add(token);
841
} catch (Exception e) {
842
notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
844
} else if (BUILTIN.equals("atomRef")) {
846
logger.debug("New atomRef found: ", curRef); // this is CML1 stuff, we get things like:
849
<stringArray builtin="atomRef">a2 a2 a2 a2 a3 a3 a4 a4 a5 a6 a7 a9</stringArray>
850
<stringArray builtin="atomRef">a9 a11 a12 a13 a5 a4 a6 a9 a7 a8 a8 a10</stringArray>
851
<stringArray builtin="order">1 1 1 1 2 1 2 1 1 1 2 2</stringArray>
856
boolean countBonds = (bondCounter == 0) ? true : false;
857
StringTokenizer st = new StringTokenizer(cData);
859
while (st.hasMoreTokens()) {
860
if (countBonds) { bondCounter++; }
861
String token = st.nextToken();
862
logger.debug("Token: ", token);
865
bondARef1.add(token);
866
} else if (curRef == 2) {
867
bondARef2.add(token);
870
} catch (Exception e) {
871
notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
873
} else if (BUILTIN.equals("order")) {
874
logger.debug("New bond order found.");
878
StringTokenizer st = new StringTokenizer(cData);
880
while (st.hasMoreTokens()) {
882
String token = st.nextToken();
883
logger.debug("Token: ", token);
886
} catch (Exception e) {
887
notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
890
} else if ("integerArray".equals(name)) {
891
logger.debug("IntegerArray: builtin = ", BUILTIN);
893
if (BUILTIN.equals("formalCharge")) {
897
StringTokenizer st = new StringTokenizer(cData);
899
while (st.hasMoreTokens()) {
901
String token = st.nextToken();
902
logger.debug("Charge added: ", token);
903
formalCharges.add(token);
905
} catch (Exception e) {
906
notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
909
} else if ("scalar".equals(name)) {
910
if (xpath.endsWith("crystal", "scalar")) {
911
logger.debug("Going to set a crystal parameter: " + crystalScalar,
914
unitcellparams[crystalScalar-1] = Double.parseDouble(cData.trim());
915
} catch (NumberFormatException exception) {
916
logger.error("Content must a float: " + cData);
918
} else if (xpath.endsWith("bond", "scalar")) {
919
if (DICTREF.equals("mdl:stereo")) {
920
bondStereo.add(cData.trim());
923
} else if (xpath.endsWith("atom", "scalar")) {
924
if (DICTREF.equals("cdk:partialCharge")) {
925
partialCharges.add(cData.trim());
927
} else if (xpath.endsWith("molecule", "scalar")) {
928
if (DICTREF.equals("pdb:id")) {
929
// cdo.setObjectProperty("Molecule", DICTREF, cData);
930
currentMolecule.setProperty(new DictRef(DICTREF, cData), cData);
931
} else if (DICTREF.equals("cdk:molecularProperty")) {
932
currentMolecule.setProperty(elementTitle, cData);
935
logger.warn("Ignoring scalar: " + xpath);
937
} else if ("floatArray".equals(name)) {
938
if (BUILTIN.equals("x3")) {
942
StringTokenizer st = new StringTokenizer(cData);
944
while (st.hasMoreTokens())
945
x3.add(st.nextToken());
946
} catch (Exception e) {
947
notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
949
} else if (BUILTIN.equals("y3")) {
953
StringTokenizer st = new StringTokenizer(cData);
955
while (st.hasMoreTokens())
956
y3.add(st.nextToken());
957
} catch (Exception e) {
958
notify("CMLParsing error: " + e, SYSTEMID, 213, 1);
960
} else if (BUILTIN.equals("z3")) {
964
StringTokenizer st = new StringTokenizer(cData);
966
while (st.hasMoreTokens())
967
z3.add(st.nextToken());
968
} catch (Exception e) {
969
notify("CMLParsing error: " + e, SYSTEMID, 221, 1);
971
} else if (BUILTIN.equals("x2")) {
972
logger.debug("New floatArray found.");
976
StringTokenizer st = new StringTokenizer(cData);
978
while (st.hasMoreTokens())
979
x2.add(st.nextToken());
980
} catch (Exception e) {
981
notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
983
} else if (BUILTIN.equals("y2")) {
984
logger.debug("New floatArray found.");
988
StringTokenizer st = new StringTokenizer(cData);
990
while (st.hasMoreTokens())
991
y2.add(st.nextToken());
992
} catch (Exception e) {
993
notify("CMLParsing error: " + e, SYSTEMID, 454, 1);
995
} else if (BUILTIN.equals("partialCharge")) {
996
logger.debug("New floatArray with partial charges found.");
1000
StringTokenizer st = new StringTokenizer(cData);
1002
while (st.hasMoreTokens())
1003
partialCharges.add(st.nextToken());
1004
} catch (Exception e) {
1005
notify("CMLParsing error: " + e, SYSTEMID, 462, 1);
1008
} else if ("basic".equals(name)) {
1009
// assuming this is the child element of <identifier>
1011
} else if ("name".equals(name)) {
1012
if (xpath.endsWith("molecule", "name")) {
1013
if (DICTREF.length() > 0) {
1014
// cdo.setObjectProperty("Molecule", DICTREF, cData);
1016
currentMolecule.setProperty(new DictRef(DICTREF, cData), cData);
1018
// cdo.setObjectProperty("Molecule", "Name", cData);
1019
currentMolecule.setProperty(CDKConstants.TITLE, cData);
1023
logger.warn("Skipping element: " + name);
1031
public void characterData(CMLStack xpath, char[] ch, int start, int length) {
1032
currentChars = currentChars + new String(ch, start, length);
1033
logger.debug("CD: ", currentChars);
1036
protected void notify(String message, String systemId, int line,
1038
logger.debug("Message: ", message);
1039
logger.debug("SystemId: ", systemId);
1040
logger.debug("Line: ", line);
1041
logger.debug("Column: ", column);
1044
protected void storeData() {
1045
if (inchi != null) {
1046
// cdo.setObjectProperty("Molecule", "inchi", inchi);
1047
currentMolecule.setProperty(CDKConstants.INCHI, inchi);
1053
protected void storeAtomData() {
1054
logger.debug("No atoms: ", atomCounter);
1055
if (atomCounter == 0) {
1059
boolean hasID = false;
1060
boolean has3D = false;
1061
boolean has3Dfract = false;
1062
boolean has2D = false;
1063
boolean hasFormalCharge = false;
1064
boolean hasPartialCharge = false;
1065
boolean hasHCounts = false;
1066
boolean hasSymbols = false;
1067
boolean hasTitles = false;
1068
boolean hasIsotopes = false;
1069
boolean hasDictRefs = false;
1070
boolean hasSpinMultiplicities = false;
1071
boolean hasOccupancies = false;
1073
if (elid.size() == atomCounter) {
1076
logger.debug("No atom ids: " + elid.size(), " != " + atomCounter);
1079
if (elsym.size() == atomCounter) {
1083
"No atom symbols: " + elsym.size(), " != " + atomCounter);
1086
if (eltitles.size() == atomCounter) {
1090
"No atom titles: " + eltitles.size(), " != " + atomCounter);
1093
if ((x3.size() == atomCounter) && (y3.size() == atomCounter) &&
1094
(z3.size() == atomCounter)) {
1098
"No 3D info: " + x3.size(), " " + y3.size(), " " +
1099
z3.size(), " != " + atomCounter);
1102
if ((xfract.size() == atomCounter) && (yfract.size() == atomCounter) &&
1103
(zfract.size() == atomCounter)) {
1107
"No 3D fractional info: " + xfract.size(), " " + yfract.size(), " " +
1108
zfract.size(), " != " + atomCounter);
1111
if ((x2.size() == atomCounter) && (y2.size() == atomCounter)) {
1115
"No 2D info: " + x2.size(), " " + y2.size(), " != " +
1119
if (formalCharges.size() == atomCounter) {
1120
hasFormalCharge = true;
1123
"No formal Charge info: " + formalCharges.size(),
1124
" != " + atomCounter);
1127
if (partialCharges.size() == atomCounter) {
1128
hasPartialCharge = true;
1131
"No partial Charge info: " + partialCharges.size(),
1132
" != " + atomCounter);
1135
if (hCounts.size() == atomCounter) {
1139
"No hydrogen Count info: " + hCounts.size(),
1140
" != " + atomCounter);
1143
if (spinMultiplicities.size() == atomCounter) {
1144
hasSpinMultiplicities = true;
1147
"No spinMultiplicity info: " + spinMultiplicities.size(),
1148
" != " + atomCounter);
1151
if (occupancies.size() == atomCounter) {
1152
hasOccupancies = true;
1155
"No occupancy info: " + occupancies.size(),
1156
" != " + atomCounter);
1159
if (atomDictRefs.size() == atomCounter) {
1163
"No dictRef info: " + atomDictRefs.size(),
1164
" != " + atomCounter);
1167
if (isotope.size() == atomCounter) {
1171
"No isotope info: " + isotope.size(),
1172
" != " + atomCounter);
1175
for (int i = 0; i < atomCounter; i++) {
1176
logger.info("Storing atom: ", i);
1177
// cdo.startObject("Atom");
1178
currentAtom = currentChemFile.getBuilder().newAtom("H");
1179
logger.debug("Atom # " + atomCounter);
1181
// cdo.setObjectProperty("Atom", "id", (String)elid.get(i));
1182
logger.debug("id: ", (String)elid.get(i));
1183
currentAtom.setID((String)elid.get(i));
1184
atomEnumeration.put((String)elid.get(i), currentAtom);
1188
String symbol = (String)elsym.get(i);
1189
if (symbol.equals("Du") || symbol.equals("Dummy")) {
1190
// cdo.setObjectProperty("PseudoAtom", "label", (String)eltitles.get(i));
1191
if (!(currentAtom instanceof IPseudoAtom)) {
1192
currentAtom = currentChemFile.getBuilder().newPseudoAtom(currentAtom);
1194
atomEnumeration.put((String)elid.get(i), currentAtom);
1196
((IPseudoAtom)currentAtom).setLabel((String)eltitles.get(i));
1198
// cdo.setObjectProperty("Atom", "title", (String)eltitles.get(i));
1200
if (eltitles.get(i) != null)
1201
currentAtom.setProperty(CDKConstants.TITLE, (String)eltitles.get(i));
1204
// cdo.setObjectProperty("Atom", "title", (String)eltitles.get(i));
1206
if (eltitles.get(i) != null)
1207
currentAtom.setProperty(CDKConstants.TITLE, (String)eltitles.get(i));
1211
// store optional atom properties
1213
String symbol = (String)elsym.get(i);
1214
if (symbol.equals("Du") || symbol.equals("Dummy")) {
1217
// cdo.setObjectProperty("Atom", "type", symbol);
1218
if (symbol.equals("R") && !(currentAtom instanceof IPseudoAtom)) {
1219
currentAtom = currentChemFile.getBuilder().newPseudoAtom(currentAtom);
1221
atomEnumeration.put((String)elid.get(i), currentAtom);
1223
currentAtom.setSymbol(symbol);
1225
IsotopeFactory.getInstance(currentAtom.getBuilder()).configure(currentAtom);
1226
}catch(Exception ex){
1227
logger.warn("Could not configure atom");
1232
// cdo.setObjectProperty("Atom", "x3", (String)x3.get(i));
1233
// cdo.setObjectProperty("Atom", "y3", (String)y3.get(i));
1234
// cdo.setObjectProperty("Atom", "z3", (String)z3.get(i));
1235
if (x3.get(i) != null &&
1236
y3.get(i) != null &&
1237
z3.get(i) != null) {
1238
currentAtom.setPoint3d(
1240
Double.parseDouble((String)x3.get(i)),
1241
Double.parseDouble((String)y3.get(i)),
1242
Double.parseDouble((String)z3.get(i))
1249
// ok, need to convert fractional into eucledian coordinates
1250
// cdo.setObjectProperty("Atom", "xFract", (String)xfract.get(i));
1251
// cdo.setObjectProperty("Atom", "yFract", (String)yfract.get(i));
1252
// cdo.setObjectProperty("Atom", "zFract", (String)zfract.get(i));
1253
currentAtom.setFractionalPoint3d(
1255
Double.parseDouble((String)xfract.get(i)),
1256
Double.parseDouble((String)yfract.get(i)),
1257
Double.parseDouble((String)zfract.get(i))
1262
if (hasFormalCharge) {
1263
// cdo.setObjectProperty("Atom", "formalCharge",
1264
// (String)formalCharges.get(i));
1265
currentAtom.setFormalCharge(Integer.parseInt((String)formalCharges.get(i)));
1268
if (hasPartialCharge) {
1269
logger.debug("Storing partial atomic charge...");
1270
// cdo.setObjectProperty("Atom", "partialCharge",
1271
// (String)partialCharges.get(i));
1272
currentAtom.setCharge(Double.parseDouble((String)partialCharges.get(i)));
1276
// cdo.setObjectProperty("Atom", "hydrogenCount", (String)hCounts.get(i));
1277
// FIXME: the hCount in CML is the total of implicit *and* explicit
1278
currentAtom.setHydrogenCount(Integer.parseInt((String)hCounts.get(i)));
1282
if (x2.get(i) != null && y2.get(i) != null) {
1283
// cdo.setObjectProperty("Atom", "x2", (String)x2.get(i));
1284
// cdo.setObjectProperty("Atom", "y2", (String)y2.get(i));
1285
currentAtom.setPoint2d(
1287
Double.parseDouble((String)x2.get(i)),
1288
Double.parseDouble((String)y2.get(i))
1295
// cdo.setObjectProperty("Atom", "dictRef", (String)atomDictRefs.get(i));
1296
if (atomDictRefs.get(i) != null)
1297
currentAtom.setProperty("org.openscience.cdk.dict", (String)atomDictRefs.get(i));
1300
if (hasSpinMultiplicities && spinMultiplicities.get(i) != null) {
1301
// cdo.setObjectProperty("Atom", "spinMultiplicity", (String)spinMultiplicities.get(i));
1302
int unpairedElectrons = Integer.parseInt((String)spinMultiplicities.get(i))-1;
1303
for (int sm=0; sm<unpairedElectrons; sm++) {
1304
currentMolecule.addSingleElectron(currentChemFile.getBuilder().newSingleElectron(currentAtom));
1308
if (hasOccupancies && occupancies.get(i) != null) {
1309
// cdo.setObjectProperty("Atom", "occupanciy", (String)occupancies.get(i));
1310
// FIXME: this has no ChemFileCDO equivalent, not even if spelled correctly
1314
// cdo.setObjectProperty("Atom", "massNumber", (String)isotope.get(i));
1315
if (isotope.get(i) != null)
1316
currentAtom.setMassNumber((int)Double.parseDouble((String)isotope.get(i)));
1319
// cdo.endObject("Atom");
1320
currentMolecule.addAtom(currentAtom);
1322
if (elid.size() > 0) {
1323
// assume this is the current working list
1329
protected void storeBondData() {
1331
"Testing a1,a2,stereo,order = count: " + bondARef1.size(), "," +
1332
bondARef2.size(), "," + bondStereo.size(), "," + order.size(), "=" +
1335
if ((bondARef1.size() == bondCounter) &&
1336
(bondARef2.size() == bondCounter)) {
1337
logger.debug("About to add bond info...");
1339
Iterator orders = order.iterator();
1340
Iterator ids = bondid.iterator();
1341
Iterator bar1s = bondARef1.iterator();
1342
Iterator bar2s = bondARef2.iterator();
1343
Iterator stereos = bondStereo.iterator();
1344
Iterator aroms = bondAromaticity.iterator();
1346
while (bar1s.hasNext()) {
1347
// cdo.startObject("Bond");
1348
// if (ids.hasNext()) {
1349
// cdo.setObjectProperty("Bond", "id", (String)ids.next());
1351
// cdo.setObjectProperty("Bond", "atom1",
1352
// new Integer(bondElid.indexOf(
1353
// (String)bar1s.next())).toString());
1354
// cdo.setObjectProperty("Bond", "atom2",
1355
// new Integer(bondElid.indexOf(
1356
// (String)bar2s.next())).toString());
1357
IAtom a1 = (IAtom)atomEnumeration.get((String)bar1s.next());
1358
IAtom a2 = (IAtom)atomEnumeration.get((String)bar2s.next());
1359
currentBond = currentChemFile.getBuilder().newBond(a1, a2);
1360
if (ids.hasNext()) {
1361
currentBond.setID((String)ids.next());
1364
if (orders.hasNext()) {
1365
String bondOrder = (String)orders.next();
1367
if ("S".equals(bondOrder)) {
1368
// cdo.setObjectProperty("Bond", "order", "1");
1369
currentBond.setOrder(CDKConstants.BONDORDER_SINGLE);
1370
} else if ("D".equals(bondOrder)) {
1371
// cdo.setObjectProperty("Bond", "order", "2");
1372
currentBond.setOrder(CDKConstants.BONDORDER_DOUBLE);
1373
} else if ("T".equals(bondOrder)) {
1374
// cdo.setObjectProperty("Bond", "order", "3");
1375
currentBond.setOrder(CDKConstants.BONDORDER_TRIPLE);
1376
} else if ("A".equals(bondOrder)) {
1377
// cdo.setObjectProperty("Bond", "order", "1.5");
1378
currentBond.setOrder(CDKConstants.BONDORDER_SINGLE);
1379
currentBond.setFlag(CDKConstants.ISAROMATIC, true);
1381
// cdo.setObjectProperty("Bond", "order", bondOrder);
1382
currentBond.setOrder(Double.parseDouble(bondOrder));
1386
if (stereos.hasNext()) {
1387
// cdo.setObjectProperty("Bond", "stereo",
1388
// (String)stereos.next());
1389
String nextStereo = (String)stereos.next();
1390
if ("H".equals(nextStereo)) {
1391
currentBond.setStereo(CDKConstants.STEREO_BOND_DOWN);
1392
} else if ("W".equals(nextStereo)) {
1393
currentBond.setStereo(CDKConstants.STEREO_BOND_UP);
1394
} else if (nextStereo != null){
1395
logger.warn("Cannot interpret stereo information: " + nextStereo);
1399
if (aroms.hasNext()) {
1400
Object nextArom = aroms.next();
1401
if (nextArom != null && nextArom == Boolean.TRUE) {
1402
currentBond.setFlag(CDKConstants.ISAROMATIC, true);
1406
// cdo.endObject("Bond");
1407
currentMolecule.addBond(currentBond);
1413
protected int addArrayElementsTo(List toAddto, String array) {
1414
StringTokenizer tokenizer = new StringTokenizer(array);
1416
while (tokenizer.hasMoreElements()) {
1417
toAddto.add(tokenizer.nextToken());