~ubuntu-branches/ubuntu/maverick/cdk/maverick

« back to all changes in this revision

Viewing changes to src/org/openscience/cdk/io/cml/CMLCoreModule.java

  • Committer: Bazaar Package Importer
  • Author(s): Paul Cager
  • Date: 2008-04-09 21:17:53 UTC
  • Revision ID: james.westby@ubuntu.com-20080409211753-46lmjw5z8mx5pd8d
Tags: upstream-1.0.2
ImportĀ upstreamĀ versionĀ 1.0.2

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/* $Revision: 8301 $ $Author: egonw $ $Date: 2007-05-05 14:00:36 +0200 (Sat, 05 May 2007) $
 
2
 *
 
3
 * Copyright (C) 1997-2007  Egon Willighagen <egonw@users.sf.net>
 
4
 *
 
5
 * Contact: cdk-devel@lists.sourceforge.net
 
6
 *
 
7
 * This program is free software; you can redistribute it and/or
 
8
 * modify it under the terms of the GNU Lesser General Public License
 
9
 * as published by the Free Software Foundation; either version 2.1
 
10
 * of the License, or (at your option) any later version.
 
11
 * All we ask is that proper credit is given for our work, which includes
 
12
 * - but is not limited to - adding the above copyright notice to the beginning
 
13
 * of your source code files, and to any copyright notice that you may distribute
 
14
 * with programs based on this work.
 
15
 *
 
16
 * This program is distributed in the hope that it will be useful,
 
17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
19
 * GNU Lesser General Public License for more details.
 
20
 *
 
21
 * You should have received a copy of the GNU Lesser General Public License
 
22
 * along with this program; if not, write to the Free Software
 
23
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 
24
 *
 
25
 */
 
26
package org.openscience.cdk.io.cml;
 
27
 
 
28
import java.util.ArrayList;
 
29
import java.util.HashMap;
 
30
import java.util.Iterator;
 
31
import java.util.List;
 
32
import java.util.Map;
 
33
import java.util.StringTokenizer;
 
34
 
 
35
import javax.vecmath.Point2d;
 
36
import javax.vecmath.Point3d;
 
37
import javax.vecmath.Vector3d;
 
38
 
 
39
import org.openscience.cdk.CDKConstants;
 
40
import org.openscience.cdk.config.IsotopeFactory;
 
41
import org.openscience.cdk.dict.DictRef;
 
42
import org.openscience.cdk.geometry.CrystalGeometryTools;
 
43
import org.openscience.cdk.interfaces.IAtom;
 
44
import org.openscience.cdk.interfaces.IAtomContainer;
 
45
import org.openscience.cdk.interfaces.IBond;
 
46
import org.openscience.cdk.interfaces.IChemFile;
 
47
import org.openscience.cdk.interfaces.IChemModel;
 
48
import org.openscience.cdk.interfaces.IChemSequence;
 
49
import org.openscience.cdk.interfaces.ICrystal;
 
50
import org.openscience.cdk.interfaces.IMolecule;
 
51
import org.openscience.cdk.interfaces.IMoleculeSet;
 
52
import org.openscience.cdk.interfaces.IMonomer;
 
53
import org.openscience.cdk.interfaces.IPseudoAtom;
 
54
import org.openscience.cdk.interfaces.IReaction;
 
55
import org.openscience.cdk.interfaces.IReactionSet;
 
56
import org.openscience.cdk.interfaces.IStrand;
 
57
import org.openscience.cdk.tools.LoggingTool;
 
58
import org.xml.sax.Attributes;
 
59
 
 
60
/**
 
61
 * Core CML 1.x and 2.0 elements are parsed by this class.
 
62
 *
 
63
 * <p>Please file a bug report if this parser fails to parse
 
64
 * a certain element or attribute value in a valid CML document.
 
65
 *
 
66
 * @cdk.module io
 
67
 *
 
68
 * @author Egon Willighagen <egonw@sci.kun.nl>
 
69
 **/
 
70
public class CMLCoreModule implements ICMLModule {
 
71
 
 
72
    protected org.openscience.cdk.tools.LoggingTool logger;
 
73
    protected final String SYSTEMID = "CML-1999-05-15";
 
74
//    protected IChemicalDocumentObject cdo;
 
75
 
 
76
    // data model to store things into
 
77
    protected IChemFile currentChemFile;
 
78
        
 
79
    protected IAtomContainer currentMolecule;
 
80
    protected IMoleculeSet currentMoleculeSet;
 
81
    protected IChemModel currentChemModel;
 
82
    protected IChemSequence currentChemSequence;
 
83
    protected IReactionSet currentReactionSet;
 
84
    protected IReaction currentReaction;
 
85
    protected IAtom currentAtom;
 
86
    protected IBond currentBond;
 
87
    protected IStrand currentStrand;
 
88
    protected IMonomer currentMonomer;
 
89
    protected Map atomEnumeration;
 
90
    
 
91
    // helper fields    
 
92
    protected int atomCounter;
 
93
    protected List elsym;
 
94
    protected List eltitles;
 
95
    protected List elid;
 
96
    protected List formalCharges;
 
97
    protected List partialCharges;
 
98
    protected List isotope;
 
99
    protected List x3;
 
100
    protected List y3;
 
101
    protected List z3;
 
102
    protected List x2;
 
103
    protected List y2;
 
104
    protected List xfract;
 
105
    protected List yfract;
 
106
    protected List zfract;
 
107
    protected List hCounts;
 
108
    protected List atomParities;
 
109
    protected List atomDictRefs;
 
110
    protected List spinMultiplicities;
 
111
    protected List occupancies;
 
112
 
 
113
    protected int bondCounter;
 
114
    protected List bondid;
 
115
    protected List bondARef1;
 
116
    protected List bondARef2;
 
117
    protected List order;
 
118
    protected List bondStereo;
 
119
    protected List bondDictRefs;
 
120
    protected List bondElid;
 
121
    protected List bondAromaticity;
 
122
    protected boolean stereoGiven;
 
123
    protected String inchi;
 
124
    protected int curRef;
 
125
    protected int CurrentElement;
 
126
    protected String BUILTIN;
 
127
    protected String DICTREF;
 
128
    protected String elementTitle;
 
129
    protected String currentChars;
 
130
    
 
131
    protected double[] unitcellparams;
 
132
    protected int crystalScalar;
 
133
    
 
134
//    private Vector3d aAxis;
 
135
//    private Vector3d bAxis;
 
136
//    private Vector3d cAxis;
 
137
    boolean cartesianAxesSet = false;
 
138
    
 
139
    public CMLCoreModule(IChemFile chemFile) {
 
140
        logger = new LoggingTool(this);
 
141
                this.currentChemFile = chemFile;
 
142
    }
 
143
    
 
144
    public CMLCoreModule(ICMLModule conv) {
 
145
        logger = new LoggingTool(this);
 
146
        inherit(conv);
 
147
    }
 
148
 
 
149
    public void inherit(ICMLModule convention) {
 
150
        if (convention instanceof CMLCoreModule) {
 
151
            CMLCoreModule conv = (CMLCoreModule)convention;
 
152
            
 
153
            // copy the data model
 
154
            this.currentChemFile = conv.currentChemFile;
 
155
            this.currentMolecule = conv.currentMolecule;
 
156
            this.currentMoleculeSet = conv.currentMoleculeSet;
 
157
            this.currentChemModel = conv.currentChemModel;
 
158
            this.currentChemSequence = conv.currentChemSequence;
 
159
            this.currentReactionSet = conv.currentReactionSet;
 
160
            this.currentReaction = conv.currentReaction;
 
161
            this.currentAtom = conv.currentAtom;
 
162
            this.currentStrand = conv.currentStrand;
 
163
            this.currentMonomer = conv.currentMonomer;
 
164
            this.atomEnumeration = conv.atomEnumeration;
 
165
            
 
166
            // copy the intermediate fields
 
167
            this.logger = conv.logger;
 
168
            this.BUILTIN = conv.BUILTIN;
 
169
            this.atomCounter = conv.atomCounter;
 
170
            this.elsym = conv.elsym;
 
171
            this.eltitles = conv.eltitles;
 
172
            this.elid = conv.elid;
 
173
            this.formalCharges = conv.formalCharges;
 
174
            this.partialCharges = conv.partialCharges;
 
175
            this.isotope = conv.isotope;
 
176
            this.x3 = conv.x3;
 
177
            this.y3 = conv.y3;
 
178
            this.z3 = conv.z3;
 
179
            this.x2 = conv.x2;
 
180
            this.y2 = conv.y2;
 
181
            this.xfract = conv.xfract;
 
182
            this.yfract = conv.yfract;
 
183
            this.zfract = conv.zfract;
 
184
            this.hCounts = conv.hCounts;
 
185
            this.atomParities = conv.atomParities;
 
186
            this.atomDictRefs = conv.atomDictRefs;
 
187
            this.spinMultiplicities = conv.spinMultiplicities;
 
188
            this.occupancies = conv.occupancies;
 
189
            this.bondCounter = conv.bondCounter;
 
190
            this.bondid = conv.bondid;
 
191
            this.bondARef1 = conv.bondARef1;
 
192
            this.bondARef2 = conv.bondARef2;
 
193
            this.order = conv.order;
 
194
            this.bondStereo = conv.bondStereo;
 
195
            this.bondDictRefs = conv.bondDictRefs;
 
196
            this.bondAromaticity = conv.bondAromaticity;
 
197
            this.curRef = conv.curRef;
 
198
            this.unitcellparams = conv.unitcellparams;
 
199
            this.inchi = conv.inchi;
 
200
        } else {
 
201
            logger.warn("Cannot inherit information from module: ", convention.getClass().getName());
 
202
        }
 
203
    }
 
204
 
 
205
    public IChemFile returnChemFile() {
 
206
        return currentChemFile;
 
207
    }
 
208
    
 
209
    /**
 
210
     * Clean all data about parsed data.
 
211
     */
 
212
    protected void newMolecule() {
 
213
        newMoleculeData();
 
214
        newAtomData();
 
215
        newBondData();
 
216
        newCrystalData();
 
217
    }
 
218
    
 
219
    /**
 
220
     * Clean all data about the molecule itself.
 
221
     */
 
222
    protected void newMoleculeData() {
 
223
        this.inchi = null;
 
224
    }
 
225
 
 
226
    /**
 
227
     * Clean all data about read atoms.
 
228
     */
 
229
    protected void newAtomData() {
 
230
        atomCounter = 0;
 
231
        elsym = new ArrayList();
 
232
        elid = new ArrayList();
 
233
        eltitles = new ArrayList();
 
234
        formalCharges = new ArrayList();
 
235
        partialCharges = new ArrayList();
 
236
        isotope = new ArrayList();
 
237
        x3 = new ArrayList();
 
238
        y3 = new ArrayList();
 
239
        z3 = new ArrayList();
 
240
        x2 = new ArrayList();
 
241
        y2 = new ArrayList();
 
242
        xfract = new ArrayList();
 
243
        yfract = new ArrayList();
 
244
        zfract = new ArrayList();
 
245
        hCounts = new ArrayList();
 
246
        atomParities = new ArrayList();
 
247
        atomDictRefs = new ArrayList();
 
248
        spinMultiplicities = new ArrayList();
 
249
        occupancies = new ArrayList();
 
250
    }
 
251
 
 
252
    /**
 
253
     * Clean all data about read bonds.
 
254
     */
 
255
    protected void newBondData() {
 
256
        bondCounter = 0;
 
257
        bondid = new ArrayList();
 
258
        bondARef1 = new ArrayList();
 
259
        bondARef2 = new ArrayList();
 
260
        order = new ArrayList();
 
261
        bondStereo = new ArrayList();
 
262
        bondDictRefs = new ArrayList();
 
263
        bondElid = new ArrayList();
 
264
        bondAromaticity = new ArrayList();
 
265
    }
 
266
 
 
267
    /**
 
268
     * Clean all data about read bonds.
 
269
     */
 
270
    protected void newCrystalData() {
 
271
        unitcellparams = new double[6];
 
272
        cartesianAxesSet = false;
 
273
        crystalScalar = 0;
 
274
//        aAxis = new Vector3d();
 
275
//        bAxis = new Vector3d();
 
276
//        cAxis = new Vector3d();
 
277
    }
 
278
 
 
279
    public void startDocument() {
 
280
        logger.info("Start XML Doc");
 
281
        // cdo.startDocument();
 
282
        currentChemSequence = currentChemFile.getBuilder().newChemSequence();
 
283
        currentChemModel = currentChemFile.getBuilder().newChemModel();
 
284
        currentMoleculeSet = currentChemFile.getBuilder().newMoleculeSet();
 
285
        currentMolecule = currentChemFile.getBuilder().newMolecule();
 
286
        atomEnumeration = new HashMap();
 
287
        
 
288
        newMolecule();
 
289
        BUILTIN = "";
 
290
        curRef = 0;
 
291
    }
 
292
    
 
293
    public void endDocument() {
 
294
//        cdo.endDocument();
 
295
        if (currentReactionSet != null && currentReactionSet.getReactionCount() == 0
 
296
                        && currentReaction != null) {
 
297
                logger.debug("Adding reaction to ReactionSet");
 
298
                currentReactionSet.addReaction(currentReaction);
 
299
        }
 
300
        if (currentReactionSet != null && currentChemModel.getReactionSet() == null) {
 
301
                logger.debug("Adding SOR to ChemModel");
 
302
                currentChemModel.setReactionSet(currentReactionSet);
 
303
        }
 
304
        if (currentMoleculeSet != null && currentMoleculeSet.getMoleculeCount() != 0) {
 
305
                logger.debug("Adding reaction to MoleculeSet");
 
306
                currentChemModel.setMoleculeSet(currentMoleculeSet);
 
307
        }
 
308
        if (currentChemSequence.getChemModelCount() == 0) {
 
309
                logger.debug("Adding ChemModel to ChemSequence");
 
310
                currentChemSequence.addChemModel(currentChemModel);
 
311
        }
 
312
        if (currentChemFile.getChemSequenceCount() == 0) {
 
313
                // assume there is one non-animation ChemSequence
 
314
//              addChemSequence(currentChemSequence);
 
315
                currentChemFile.addChemSequence(currentChemSequence);
 
316
        }
 
317
        
 
318
        logger.info("End XML Doc");
 
319
    }
 
320
    
 
321
    public void startElement(CMLStack xpath, String uri, String local, String raw, 
 
322
                              Attributes atts) {
 
323
        String name = local;
 
324
        logger.debug("StartElement");
 
325
        currentChars = "";
 
326
        
 
327
        BUILTIN = "";
 
328
        DICTREF = "";
 
329
 
 
330
        for (int i=0; i<atts.getLength(); i++) {
 
331
            String qname = atts.getQName(i);
 
332
            if (qname.equals("builtin")) {
 
333
                BUILTIN = atts.getValue(i);
 
334
                logger.debug(name, "->BUILTIN found: ", atts.getValue(i));
 
335
            } else if (qname.equals("dictRef")) {
 
336
                DICTREF = atts.getValue(i);
 
337
                logger.debug(name, "->DICTREF found: ", atts.getValue(i));
 
338
            } else if (qname.equals("title")) {
 
339
                elementTitle = atts.getValue(i);
 
340
                logger.debug(name, "->TITLE found: ", atts.getValue(i));
 
341
            } else {
 
342
                logger.debug("Qname: ", qname);
 
343
            }
 
344
        }
 
345
        
 
346
        if ("atom".equals(name)) {
 
347
            atomCounter++;
 
348
            for (int i = 0; i < atts.getLength(); i++) {
 
349
                
 
350
                String att = atts.getQName(i);
 
351
                String value = atts.getValue(i);
 
352
                
 
353
                if (att.equals("id")) { // this is supported in CML 1.x
 
354
                    elid.add(value);
 
355
                } // this is supported in CML 2.0 
 
356
                else if (att.equals("elementType")) {
 
357
                    elsym.add(value);
 
358
                } // this is supported in CML 2.0 
 
359
                else if (att.equals("title")) {
 
360
                    eltitles.add(value);
 
361
                } // this is supported in CML 2.0 
 
362
                else if (att.equals("x2")) {
 
363
                    x2.add(value);
 
364
                } // this is supported in CML 2.0 
 
365
                else if (att.equals("xy2")) {
 
366
                    StringTokenizer tokenizer = new StringTokenizer(value);
 
367
                    x2.add(tokenizer.nextToken());
 
368
                    y2.add(tokenizer.nextToken());
 
369
                } // this is supported in CML 2.0 
 
370
                else if (att.equals("xyzFract")) {
 
371
                    StringTokenizer tokenizer = new StringTokenizer(value);
 
372
                    xfract.add(tokenizer.nextToken());
 
373
                    yfract.add(tokenizer.nextToken());
 
374
                    zfract.add(tokenizer.nextToken());
 
375
                } // this is supported in CML 2.0 
 
376
                else if (att.equals("xyz3")) {
 
377
                    StringTokenizer tokenizer = new StringTokenizer(value);
 
378
                    x3.add(tokenizer.nextToken());
 
379
                    y3.add(tokenizer.nextToken());
 
380
                    z3.add(tokenizer.nextToken());
 
381
                } // this is supported in CML 2.0 
 
382
                else if (att.equals("y2")) {
 
383
                    y2.add(value);
 
384
                } // this is supported in CML 2.0 
 
385
                else if (att.equals("x3")) {
 
386
                    x3.add(value);
 
387
                } // this is supported in CML 2.0 
 
388
                else if (att.equals("y3")) {
 
389
                    y3.add(value);
 
390
                } // this is supported in CML 2.0 
 
391
                else if (att.equals("z3")) {
 
392
                    z3.add(value);
 
393
                } // this is supported in CML 2.0 
 
394
                else if (att.equals("xFract")) {
 
395
                    xfract.add(value);
 
396
                } // this is supported in CML 2.0 
 
397
                else if (att.equals("yFract")) {
 
398
                    yfract.add(value);
 
399
                } // this is supported in CML 2.0 
 
400
                else if (att.equals("zFract")) {
 
401
                    zfract.add(value);
 
402
                } // this is supported in CML 2.0 
 
403
                else if (att.equals("formalCharge")) {
 
404
                    formalCharges.add(value);
 
405
                } // this is supported in CML 2.0 
 
406
                else if (att.equals("hydrogenCount")) {
 
407
                    hCounts.add(value);
 
408
                }
 
409
                else if (att.equals("isotope")) {
 
410
                    isotope.add(value);
 
411
                }
 
412
                else if (att.equals("dictRef")) {                       
 
413
                    logger.debug("ocupaccy: "+value);
 
414
                    atomDictRefs.add(value);
 
415
                } 
 
416
                else if (att.equals("spinMultiplicity")) {
 
417
                    spinMultiplicities.add(value);
 
418
                }
 
419
                else if (att.equals("occupancy")) {
 
420
                    occupancies.add(value);
 
421
                } 
 
422
                 
 
423
                else {
 
424
                    logger.warn("Unparsed attribute: " + att);
 
425
                }
 
426
            }
 
427
        } else if ("atomArray".equals(name) &&
 
428
                           !xpath.endsWith("formula", "atomArray")) {
 
429
            boolean atomsCounted = false;
 
430
            for (int i = 0; i < atts.getLength(); i++) {
 
431
                String att = atts.getQName(i);
 
432
                int count = 0;
 
433
                if (att.equals("atomID")) {
 
434
                    count = addArrayElementsTo(elid, atts.getValue(i));
 
435
                } else if (att.equals("elementType")) {
 
436
                    count = addArrayElementsTo(elsym, atts.getValue(i));
 
437
                } else if (att.equals("x2")) {
 
438
                    count = addArrayElementsTo(x2, atts.getValue(i));
 
439
                } else if (att.equals("y2")) {
 
440
                    count = addArrayElementsTo(y2, atts.getValue(i));
 
441
                } else if (att.equals("x3")) {
 
442
                    count = addArrayElementsTo(x3, atts.getValue(i));
 
443
                } else if (att.equals("y3")) {
 
444
                    count = addArrayElementsTo(y3, atts.getValue(i));
 
445
                } else if (att.equals("z3")) {
 
446
                    count = addArrayElementsTo(z3, atts.getValue(i));
 
447
                } else if (att.equals("xFract")) {
 
448
                    count = addArrayElementsTo(xfract, atts.getValue(i));
 
449
                } else if (att.equals("yFract")) {
 
450
                    count = addArrayElementsTo(yfract, atts.getValue(i));
 
451
                } else if (att.equals("zFract")) {
 
452
                    count = addArrayElementsTo(zfract, atts.getValue(i));
 
453
                } else {
 
454
                    logger.warn("Unparsed attribute: " + att);
 
455
                }
 
456
                if (!atomsCounted) {
 
457
                    atomCounter += count;
 
458
                    atomsCounted = true;
 
459
                }
 
460
            }
 
461
        } else if ("bond".equals(name)) {
 
462
            bondCounter++;
 
463
            for (int i = 0; i < atts.getLength(); i++) {
 
464
                String att = atts.getQName(i);
 
465
                logger.debug("B2 ", att, "=", atts.getValue(i));
 
466
                
 
467
                if (att.equals("id")) {
 
468
                    bondid.add(atts.getValue(i));
 
469
                    logger.debug("B3 ", bondid);
 
470
                } else if (att.equals("atomRefs") || // this is CML 1.x support
 
471
                           att.equals("atomRefs2")) { // this is CML 2.0 support
 
472
                    
 
473
                    // expect exactly two references
 
474
                    try {
 
475
                        StringTokenizer st = new StringTokenizer(
 
476
                            atts.getValue(i)
 
477
                        );
 
478
                        bondARef1.add((String)st.nextElement());
 
479
                        bondARef2.add((String)st.nextElement());
 
480
                    } catch (Exception e) {
 
481
                        logger.error("Error in CML file: ", e.getMessage());
 
482
                        logger.debug(e);
 
483
                    }
 
484
                } else if (att.equals("order")) { // this is CML 2.0 support
 
485
                    order.add(atts.getValue(i).trim());
 
486
                } else if (att.equals("dictRef")) {
 
487
                    bondDictRefs.add(atts.getValue(i).trim());
 
488
                }
 
489
            }
 
490
            
 
491
            stereoGiven = false;
 
492
            curRef = 0;
 
493
        } else if ("bondArray".equals(name)) {
 
494
            boolean bondsCounted = false;
 
495
            for (int i = 0; i < atts.getLength(); i++) {
 
496
                String att = atts.getQName(i);
 
497
                int count = 0;
 
498
                if (att.equals("bondID")) {
 
499
                    count = addArrayElementsTo(bondid, atts.getValue(i));
 
500
                } else if (att.equals("atomRefs1")) {
 
501
                    count = addArrayElementsTo(bondARef1, atts.getValue(i));
 
502
                } else if (att.equals("atomRefs2")) {
 
503
                    count = addArrayElementsTo(bondARef2, atts.getValue(i));
 
504
                } else if (att.equals("atomRef1")) {
 
505
                    count = addArrayElementsTo(bondARef1, atts.getValue(i));
 
506
                } else if (att.equals("atomRef2")) {
 
507
                    count = addArrayElementsTo(bondARef2, atts.getValue(i));
 
508
                } else if (att.equals("order")) {
 
509
                    count = addArrayElementsTo(order, atts.getValue(i));
 
510
                } else {
 
511
                    logger.warn("Unparsed attribute: " + att);
 
512
                }
 
513
                if (!bondsCounted) {
 
514
                    bondCounter += count;
 
515
                    bondsCounted = true;
 
516
                }
 
517
            }
 
518
            curRef = 0;
 
519
        } else if ("bondStereo".equals(name)) {
 
520
            for (int i = 0; i < atts.getLength(); i++) {
 
521
                if (atts.getQName(i).equals("dictRef")) {
 
522
                        if (atts.getValue(i).startsWith("cml:"))
 
523
                        bondStereo.add(atts.getValue(i).substring(4));
 
524
                    stereoGiven=true;
 
525
                }
 
526
            }
 
527
        } else if ("bondType".equals(name)) {
 
528
            for (int i = 0; i < atts.getLength(); i++) {
 
529
                if (atts.getQName(i).equals("dictRef")) {
 
530
                        if (atts.getValue(i).equals("cdk:aromaticBond"))
 
531
                                bondAromaticity.add(Boolean.TRUE);
 
532
                }
 
533
            }
 
534
        } else if ("molecule".equals(name)) {
 
535
            newMolecule();
 
536
            BUILTIN = "";
 
537
//            cdo.startObject("Molecule");
 
538
            if (currentChemModel == null) currentChemModel = currentChemFile.getBuilder().newChemModel();
 
539
            if (currentMoleculeSet == null) currentMoleculeSet = currentChemFile.getBuilder().newMoleculeSet();
 
540
            currentMolecule = currentChemFile.getBuilder().newMolecule();
 
541
            for (int i = 0; i < atts.getLength(); i++) {
 
542
                if (atts.getQName(i).equals("id")) {
 
543
//                    cdo.setObjectProperty("Molecule", "id", atts.getValue(i));
 
544
                        currentMolecule.setID(atts.getValue(i));
 
545
                } else if (atts.getQName(i).equals("dictRef")) {
 
546
//                      cdo.setObjectProperty("Molecule", "dictRef", atts.getValue(i));
 
547
                        currentMolecule.setProperty(new DictRef(DICTREF, atts.getValue(i)), atts.getValue(i));
 
548
                }
 
549
            }
 
550
        } else if ("crystal".equals(name)) {
 
551
            newCrystalData();
 
552
//            cdo.startObject("Crystal");
 
553
            currentMolecule = currentChemFile.getBuilder().newCrystal(currentMolecule);
 
554
            for (int i = 0; i < atts.getLength(); i++) {
 
555
                String att = atts.getQName(i);
 
556
                if (att.equals("z")) {
 
557
//                    cdo.setObjectProperty("Crystal", "z", atts.getValue(i));
 
558
                        ((ICrystal)currentMolecule).setZ(Integer.parseInt(atts.getValue(i)));
 
559
                }
 
560
            }
 
561
        } else if ("symmetry".equals(name)) {
 
562
            for (int i = 0; i < atts.getLength(); i++) {
 
563
                String att = atts.getQName(i);
 
564
                if (att.equals("spaceGroup")) {
 
565
//                    cdo.setObjectProperty("Crystal", "spacegroup", atts.getValue(i));
 
566
                        ((ICrystal)currentMolecule).setSpaceGroup(atts.getValue(i));
 
567
                }
 
568
            }
 
569
        } else if ("identifier".equals(name)) {
 
570
                if (atts.getValue("convention") != null && 
 
571
                        atts.getValue("convention").equals("iupac:inchi") &&
 
572
                        atts.getValue("value") != null) {
 
573
//                cdo.setObjectProperty("Molecule", "inchi", atts.getValue("value"));
 
574
                        currentMolecule.setProperty(CDKConstants.INCHI, atts.getValue("value"));
 
575
            }
 
576
        } else if ("scalar".equals(name)) {
 
577
            if (xpath.endsWith("crystal", "scalar"))
 
578
                crystalScalar++;
 
579
        } else if ("label".equals(name)) {
 
580
            if (xpath.endsWith("atomType", "label")) {
 
581
//              cdo.setObjectProperty("Atom", "atomTypeLabel", atts.getValue("value"));
 
582
                currentAtom.setAtomTypeName(atts.getValue("value"));
 
583
            }
 
584
        } else if ("list".equals(name)) {
 
585
//            cdo.startObject("MoleculeSet");
 
586
                if (DICTREF.equals("cdk:model")) {
 
587
                        currentChemModel = currentChemFile.getBuilder().newChemModel();
 
588
                } else if (DICTREF.equals("cdk:moleculeSet")) {
 
589
                        currentMoleculeSet = currentChemFile.getBuilder().newMoleculeSet();
 
590
                        currentMolecule = currentChemFile.getBuilder().newMolecule();
 
591
                } else {
 
592
                        // the old default
 
593
                        currentMoleculeSet = currentChemFile.getBuilder().newMoleculeSet();
 
594
                        currentMolecule = currentChemFile.getBuilder().newMolecule();
 
595
                }
 
596
        }
 
597
    }
 
598
 
 
599
    public void endElement(CMLStack xpath, String uri, String name, String raw) {
 
600
        logger.debug("EndElement: ", name);
 
601
 
 
602
        String cData = currentChars;
 
603
        if ("bond".equals(name)) {
 
604
                if (!stereoGiven)
 
605
                bondStereo.add("");
 
606
            if (bondStereo.size() > bondDictRefs.size())
 
607
                bondDictRefs.add(null);
 
608
            if (bondAromaticity.size() > bondDictRefs.size())
 
609
                bondAromaticity.add(null);
 
610
        } else if ("atom".equals(name)) {
 
611
            if (atomCounter > eltitles.size()) {
 
612
                eltitles.add(null);
 
613
            }
 
614
            if (atomCounter > hCounts.size()) {
 
615
                /* while strictly undefined, assume zero 
 
616
                implicit hydrogens when no number is given */
 
617
                hCounts.add("0");
 
618
            }
 
619
            if (atomCounter > atomDictRefs.size()) {
 
620
                atomDictRefs.add(null);
 
621
            }
 
622
            if (atomCounter > isotope.size()) {
 
623
                isotope.add(null);
 
624
            }
 
625
            if (atomCounter > spinMultiplicities.size()) {
 
626
                spinMultiplicities.add(null);
 
627
            }
 
628
            if (atomCounter > occupancies.size()) {
 
629
                occupancies.add(null);
 
630
            }
 
631
            if (atomCounter > formalCharges.size()) {
 
632
                /* while strictly undefined, assume zero 
 
633
                implicit hydrogens when no number is given */
 
634
                formalCharges.add("0");
 
635
            }
 
636
            /* It may happen that not all atoms have
 
637
            associated 2D or 3D coordinates. accept that */
 
638
            if (atomCounter > x2.size() && x2.size() != 0) {
 
639
                /* apparently, the previous atoms had atomic
 
640
                coordinates, add 'null' for this atom */
 
641
                x2.add(null);
 
642
                y2.add(null);
 
643
            }
 
644
            if (atomCounter > x3.size() && x3.size() != 0) {
 
645
                /* apparently, the previous atoms had atomic
 
646
                coordinates, add 'null' for this atom */
 
647
                x3.add(null);
 
648
                y3.add(null);
 
649
                z3.add(null);
 
650
            }
 
651
            
 
652
            if (atomCounter > xfract.size() && xfract.size() != 0) {
 
653
                /* apparently, the previous atoms had atomic
 
654
                coordinates, add 'null' for this atom */
 
655
                xfract.add(null);
 
656
                yfract.add(null);
 
657
                zfract.add(null);
 
658
            }
 
659
        } else if ("molecule".equals(name)) {
 
660
            storeData();
 
661
//            cdo.endObject("Molecule");
 
662
            if (currentMolecule instanceof IMolecule) {
 
663
                logger.debug("Adding molecule to set");
 
664
                currentMoleculeSet.addMolecule((IMolecule)currentMolecule);
 
665
                logger.debug("#mols in set: " + currentMoleculeSet.getMoleculeCount());
 
666
            } else if (currentMolecule instanceof ICrystal) {
 
667
                logger.debug("Adding crystal to chemModel");
 
668
                currentChemModel.setCrystal((ICrystal)currentMolecule);
 
669
                currentChemSequence.addChemModel(currentChemModel);
 
670
            }
 
671
        } else if ("crystal".equals(name)) {
 
672
            if (crystalScalar > 0) {
 
673
                // convert unit cell parameters to cartesians
 
674
                Vector3d[] axes = CrystalGeometryTools.notionalToCartesian(
 
675
                    unitcellparams[0], unitcellparams[1], unitcellparams[2],
 
676
                    unitcellparams[3], unitcellparams[4], unitcellparams[5]
 
677
                );
 
678
                cartesianAxesSet = true;
 
679
//                cdo.startObject("a-axis");
 
680
//                cdo.setObjectProperty("a-axis", "x", new Double(aAxis.x).toString());
 
681
//                cdo.setObjectProperty("a-axis", "y", new Double(aAxis.y).toString());
 
682
//                cdo.setObjectProperty("a-axis", "z", new Double(aAxis.z).toString());
 
683
//                cdo.endObject("a-axis");
 
684
//                cdo.startObject("b-axis");
 
685
//                cdo.setObjectProperty("b-axis", "x", new Double(bAxis.x).toString());
 
686
//                cdo.setObjectProperty("b-axis", "y", new Double(bAxis.y).toString());
 
687
//                cdo.setObjectProperty("b-axis", "z", new Double(bAxis.z).toString());
 
688
//                cdo.endObject("b-axis");
 
689
//                cdo.startObject("c-axis");
 
690
//                cdo.setObjectProperty("c-axis", "x", new Double(cAxis.x).toString());
 
691
//                cdo.setObjectProperty("c-axis", "y", new Double(cAxis.y).toString());
 
692
//                cdo.setObjectProperty("c-axis", "z", new Double(cAxis.z).toString());
 
693
//                cdo.endObject("c-axis");
 
694
                ((ICrystal)currentMolecule).setA(axes[0]);
 
695
                ((ICrystal)currentMolecule).setB(axes[1]);
 
696
                ((ICrystal)currentMolecule).setC(axes[2]);
 
697
            } else {
 
698
                logger.error("Could not find crystal unit cell parameters");
 
699
            }
 
700
//            cdo.endObject("Crystal");
 
701
        } else if ("list".equals(name)) {
 
702
//            cdo.endObject("MoleculeSet");
 
703
                // FIXME: I really should check the DICTREF, but there is currently
 
704
                // no mechanism for storing these for use with endTag() :(
 
705
                // So, instead, for now, just see if it already has done the setting
 
706
                // to work around duplication
 
707
                if (currentChemModel.getMoleculeSet() != currentMoleculeSet) {
 
708
                        currentChemModel.setMoleculeSet(currentMoleculeSet);
 
709
                        currentChemSequence.addChemModel(currentChemModel);
 
710
                }
 
711
        } else if ("coordinate3".equals(name)) {
 
712
            if (BUILTIN.equals("xyz3")) {
 
713
                logger.debug("New coord3 xyz3 found: ", currentChars);
 
714
                
 
715
                try {
 
716
                    
 
717
                    StringTokenizer st = new StringTokenizer(currentChars);
 
718
                    x3.add(st.nextToken());
 
719
                    y3.add(st.nextToken());
 
720
                    z3.add(st.nextToken());
 
721
                    logger.debug("coord3 x3.length: ", x3.size());
 
722
                    logger.debug("coord3 y3.length: ", y3.size());
 
723
                    logger.debug("coord3 z3.length: ", z3.size());
 
724
                } catch (Exception exception) {
 
725
                    logger.error(
 
726
                    "CMLParsing error while setting coordinate3!");
 
727
                    logger.debug(exception);
 
728
                }
 
729
            } else {
 
730
                logger.warn("Unknown coordinate3 BUILTIN: " + BUILTIN);
 
731
            }
 
732
        } else if ("string".equals(name)) {
 
733
            if (BUILTIN.equals("elementType")) {
 
734
                logger.debug("Element: ", cData.trim());
 
735
                elsym.add(cData);
 
736
            } else if (BUILTIN.equals("atomRef")) {
 
737
                curRef++;
 
738
                logger.debug("Bond: ref #", curRef);
 
739
                
 
740
                if (curRef == 1) {
 
741
                    bondARef1.add(cData.trim());
 
742
                } else if (curRef == 2) {
 
743
                    bondARef2.add(cData.trim());
 
744
                }
 
745
            } else if (BUILTIN.equals("order")) {
 
746
                logger.debug("Bond: order ", cData.trim());
 
747
                order.add(cData.trim());
 
748
            } else if (BUILTIN.equals("formalCharge")) {
 
749
                // NOTE: this combination is in violation of the CML DTD!!!
 
750
                logger.warn("formalCharge BUILTIN accepted but violating CML DTD");
 
751
                logger.debug("Charge: ", cData.trim());
 
752
                String charge = cData.trim();
 
753
                if (charge.startsWith("+") && charge.length() > 1) {
 
754
                    charge = charge.substring(1);
 
755
                }
 
756
                formalCharges.add(charge);
 
757
            }
 
758
        } else if ("float".equals(name)) {
 
759
            if (BUILTIN.equals("x3")) {
 
760
                x3.add(cData.trim());
 
761
            } else if (BUILTIN.equals("y3")) {
 
762
                y3.add(cData.trim());
 
763
            } else if (BUILTIN.equals("z3")) {
 
764
                z3.add(cData.trim());
 
765
            } else if (BUILTIN.equals("x2")) {
 
766
                x2.add(cData.trim());
 
767
            } else if (BUILTIN.equals("y2")) {
 
768
                y2.add(cData.trim());
 
769
            } else if (BUILTIN.equals("order")) {
 
770
                // NOTE: this combination is in violation of the CML DTD!!!
 
771
                order.add(cData.trim());
 
772
            } else if (BUILTIN.equals("charge") || BUILTIN.equals("partialCharge")) {
 
773
                partialCharges.add(cData.trim());
 
774
            }
 
775
        } else if ("integer".equals(name)) {
 
776
            if (BUILTIN.equals("formalCharge")) {
 
777
                formalCharges.add(cData.trim());
 
778
            }
 
779
        } else if ("coordinate2".equals(name)) {
 
780
            if (BUILTIN.equals("xy2")) {
 
781
                logger.debug("New coord2 xy2 found.", cData);
 
782
                
 
783
                try {
 
784
                    
 
785
                    StringTokenizer st = new StringTokenizer(cData);
 
786
                    x2.add(st.nextToken());
 
787
                    y2.add(st.nextToken());
 
788
                } catch (Exception e) {
 
789
                    notify("CMLParsing error: " + e, SYSTEMID, 175, 1);
 
790
                }
 
791
            }
 
792
        } else if ("stringArray".equals(name)) {
 
793
            if (BUILTIN.equals("id") || BUILTIN.equals("atomId")
 
794
                || BUILTIN.equals("atomID")) { // invalid according to CML1 DTD but found in OpenBabel 1.x output
 
795
                
 
796
                try {
 
797
                    boolean countAtoms = (atomCounter == 0) ? true : false;
 
798
                    StringTokenizer st = new StringTokenizer(cData);
 
799
                    
 
800
                    while (st.hasMoreTokens()) {
 
801
                        if (countAtoms) { atomCounter++; }
 
802
                        String token = st.nextToken();
 
803
                        logger.debug("StringArray (Token): ", token);
 
804
                        elid.add(token);
 
805
                    }
 
806
                } catch (Exception e) {
 
807
                    notify("CMLParsing error: " + e, SYSTEMID, 186, 1);
 
808
                }
 
809
            } else if (BUILTIN.equals("elementType")) {
 
810
                
 
811
                try {
 
812
                    boolean countAtoms = (atomCounter == 0) ? true : false;
 
813
                    StringTokenizer st = new StringTokenizer(cData);
 
814
                    
 
815
                    while (st.hasMoreTokens()) {
 
816
                        if (countAtoms) { atomCounter++; }
 
817
                        elsym.add(st.nextToken());
 
818
                    }
 
819
                } catch (Exception e) {
 
820
                    notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
 
821
                }
 
822
            } else if (BUILTIN.equals("atomRefs")) {
 
823
                curRef++;
 
824
                logger.debug("New atomRefs found: ", curRef);
 
825
                
 
826
                try {
 
827
                    boolean countBonds = (bondCounter == 0) ? true : false;
 
828
                    StringTokenizer st = new StringTokenizer(cData);
 
829
                    
 
830
                    while (st.hasMoreTokens()) {
 
831
                        if (countBonds) { bondCounter++; }
 
832
                        String token = st.nextToken();
 
833
                        logger.debug("Token: ", token);
 
834
                        
 
835
                        if (curRef == 1) {
 
836
                            bondARef1.add(token);
 
837
                        } else if (curRef == 2) {
 
838
                            bondARef2.add(token);
 
839
                        }
 
840
                    }
 
841
                } catch (Exception e) {
 
842
                    notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
 
843
                }
 
844
            } else if (BUILTIN.equals("atomRef")) {
 
845
                curRef++;
 
846
                logger.debug("New atomRef found: ", curRef); // this is CML1 stuff, we get things like:
 
847
                /*
 
848
                  <bondArray>
 
849
                  <stringArray builtin="atomRef">a2 a2 a2 a2 a3 a3 a4 a4 a5 a6 a7 a9</stringArray>
 
850
                  <stringArray builtin="atomRef">a9 a11 a12 a13 a5 a4 a6 a9 a7 a8 a8 a10</stringArray>
 
851
                  <stringArray builtin="order">1 1 1 1 2 1 2 1 1 1 2 2</stringArray>
 
852
                  </bondArray>
 
853
                */
 
854
                
 
855
                try {
 
856
                    boolean countBonds = (bondCounter == 0) ? true : false;
 
857
                    StringTokenizer st = new StringTokenizer(cData);
 
858
                    
 
859
                    while (st.hasMoreTokens()) {
 
860
                        if (countBonds) { bondCounter++; }
 
861
                        String token = st.nextToken();
 
862
                        logger.debug("Token: ", token);
 
863
                        
 
864
                        if (curRef == 1) {
 
865
                            bondARef1.add(token);
 
866
                        } else if (curRef == 2) {
 
867
                            bondARef2.add(token);
 
868
                        }
 
869
                    }
 
870
                } catch (Exception e) {
 
871
                    notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
 
872
                }
 
873
            } else if (BUILTIN.equals("order")) {
 
874
                logger.debug("New bond order found.");
 
875
                
 
876
                try {
 
877
                    
 
878
                    StringTokenizer st = new StringTokenizer(cData);
 
879
                    
 
880
                    while (st.hasMoreTokens()) {
 
881
                        
 
882
                        String token = st.nextToken();
 
883
                        logger.debug("Token: ", token);
 
884
                        order.add(token);
 
885
                    }
 
886
                } catch (Exception e) {
 
887
                    notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
 
888
                }
 
889
            }
 
890
        } else if ("integerArray".equals(name)) {
 
891
            logger.debug("IntegerArray: builtin = ", BUILTIN);
 
892
            
 
893
            if (BUILTIN.equals("formalCharge")) {
 
894
                
 
895
                try {
 
896
                    
 
897
                    StringTokenizer st = new StringTokenizer(cData);
 
898
                    
 
899
                    while (st.hasMoreTokens()) {
 
900
                        
 
901
                        String token = st.nextToken();
 
902
                        logger.debug("Charge added: ", token);
 
903
                        formalCharges.add(token);
 
904
                    }
 
905
                } catch (Exception e) {
 
906
                    notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
 
907
                }
 
908
            }
 
909
        } else if ("scalar".equals(name)) {
 
910
            if (xpath.endsWith("crystal", "scalar")) {
 
911
                logger.debug("Going to set a crystal parameter: " + crystalScalar, 
 
912
                    " to ", cData);
 
913
                try {
 
914
                    unitcellparams[crystalScalar-1] = Double.parseDouble(cData.trim());
 
915
                } catch (NumberFormatException exception) {
 
916
                    logger.error("Content must a float: " + cData);
 
917
                }
 
918
            } else if (xpath.endsWith("bond", "scalar")) {
 
919
                if (DICTREF.equals("mdl:stereo")) {
 
920
                        bondStereo.add(cData.trim());
 
921
                    stereoGiven=true;
 
922
                }
 
923
            } else if (xpath.endsWith("atom", "scalar")) {
 
924
                if (DICTREF.equals("cdk:partialCharge")) {
 
925
                    partialCharges.add(cData.trim());
 
926
                }
 
927
            } else if (xpath.endsWith("molecule", "scalar")) {
 
928
                if (DICTREF.equals("pdb:id")) {
 
929
//                      cdo.setObjectProperty("Molecule", DICTREF, cData);
 
930
                        currentMolecule.setProperty(new DictRef(DICTREF, cData), cData);
 
931
                } else if (DICTREF.equals("cdk:molecularProperty")) {
 
932
                        currentMolecule.setProperty(elementTitle, cData);
 
933
                }
 
934
            } else {
 
935
                logger.warn("Ignoring scalar: " + xpath);
 
936
            }
 
937
        } else if ("floatArray".equals(name)) {
 
938
            if (BUILTIN.equals("x3")) {
 
939
                
 
940
                try {
 
941
                    
 
942
                    StringTokenizer st = new StringTokenizer(cData);
 
943
                    
 
944
                    while (st.hasMoreTokens())
 
945
                        x3.add(st.nextToken());
 
946
                } catch (Exception e) {
 
947
                    notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
 
948
                }
 
949
            } else if (BUILTIN.equals("y3")) {
 
950
                
 
951
                try {
 
952
                    
 
953
                    StringTokenizer st = new StringTokenizer(cData);
 
954
                    
 
955
                    while (st.hasMoreTokens())
 
956
                        y3.add(st.nextToken());
 
957
                } catch (Exception e) {
 
958
                    notify("CMLParsing error: " + e, SYSTEMID, 213, 1);
 
959
                }
 
960
            } else if (BUILTIN.equals("z3")) {
 
961
                
 
962
                try {
 
963
                    
 
964
                    StringTokenizer st = new StringTokenizer(cData);
 
965
                    
 
966
                    while (st.hasMoreTokens())
 
967
                        z3.add(st.nextToken());
 
968
                } catch (Exception e) {
 
969
                    notify("CMLParsing error: " + e, SYSTEMID, 221, 1);
 
970
                }
 
971
            } else if (BUILTIN.equals("x2")) {
 
972
                logger.debug("New floatArray found.");
 
973
                
 
974
                try {
 
975
                    
 
976
                    StringTokenizer st = new StringTokenizer(cData);
 
977
                    
 
978
                    while (st.hasMoreTokens())
 
979
                        x2.add(st.nextToken());
 
980
                } catch (Exception e) {
 
981
                    notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
 
982
                }
 
983
            } else if (BUILTIN.equals("y2")) {
 
984
                logger.debug("New floatArray found.");
 
985
                
 
986
                try {
 
987
                    
 
988
                    StringTokenizer st = new StringTokenizer(cData);
 
989
                    
 
990
                    while (st.hasMoreTokens())
 
991
                        y2.add(st.nextToken());
 
992
                } catch (Exception e) {
 
993
                    notify("CMLParsing error: " + e, SYSTEMID, 454, 1);
 
994
                }
 
995
            } else if (BUILTIN.equals("partialCharge")) {
 
996
                logger.debug("New floatArray with partial charges found.");
 
997
                
 
998
                try {
 
999
                    
 
1000
                    StringTokenizer st = new StringTokenizer(cData);
 
1001
                    
 
1002
                    while (st.hasMoreTokens())
 
1003
                        partialCharges.add(st.nextToken());
 
1004
                } catch (Exception e) {
 
1005
                    notify("CMLParsing error: " + e, SYSTEMID, 462, 1);
 
1006
                }
 
1007
            }
 
1008
        } else if ("basic".equals(name)) {
 
1009
            // assuming this is the child element of <identifier>
 
1010
            this.inchi = cData;
 
1011
        } else if ("name".equals(name)) {
 
1012
            if (xpath.endsWith("molecule", "name")) {
 
1013
                if (DICTREF.length() > 0) {
 
1014
//                      cdo.setObjectProperty("Molecule", DICTREF, cData);
 
1015
                        
 
1016
                        currentMolecule.setProperty(new DictRef(DICTREF, cData), cData);
 
1017
                } else {
 
1018
//                      cdo.setObjectProperty("Molecule", "Name", cData);
 
1019
                        currentMolecule.setProperty(CDKConstants.TITLE, cData);
 
1020
                }
 
1021
            }
 
1022
        } else {
 
1023
            logger.warn("Skipping element: " + name);
 
1024
        }
 
1025
 
 
1026
        currentChars = "";
 
1027
        BUILTIN = "";
 
1028
        elementTitle = "";
 
1029
    }
 
1030
 
 
1031
    public void characterData(CMLStack xpath, char[] ch, int start, int length) {
 
1032
        currentChars = currentChars + new String(ch, start, length);
 
1033
        logger.debug("CD: ", currentChars);
 
1034
    }
 
1035
 
 
1036
    protected void notify(String message, String systemId, int line, 
 
1037
                          int column) {
 
1038
        logger.debug("Message: ", message);
 
1039
        logger.debug("SystemId: ", systemId);
 
1040
        logger.debug("Line: ", line);
 
1041
        logger.debug("Column: ", column);
 
1042
    }
 
1043
 
 
1044
    protected void storeData() {
 
1045
        if (inchi != null) {
 
1046
//            cdo.setObjectProperty("Molecule", "inchi", inchi);
 
1047
                currentMolecule.setProperty(CDKConstants.INCHI, inchi);
 
1048
        }
 
1049
        storeAtomData();
 
1050
        storeBondData();
 
1051
    }
 
1052
 
 
1053
    protected void storeAtomData() {
 
1054
        logger.debug("No atoms: ", atomCounter);
 
1055
        if (atomCounter == 0) {
 
1056
            return;
 
1057
        }
 
1058
 
 
1059
        boolean hasID = false;
 
1060
        boolean has3D = false;
 
1061
        boolean has3Dfract = false;
 
1062
        boolean has2D = false;
 
1063
        boolean hasFormalCharge = false;
 
1064
        boolean hasPartialCharge = false;
 
1065
        boolean hasHCounts = false;
 
1066
        boolean hasSymbols = false;
 
1067
        boolean hasTitles = false;
 
1068
        boolean hasIsotopes = false;
 
1069
        boolean hasDictRefs = false;
 
1070
        boolean hasSpinMultiplicities = false;
 
1071
        boolean hasOccupancies = false;
 
1072
 
 
1073
        if (elid.size() == atomCounter) {
 
1074
            hasID = true;
 
1075
        } else {
 
1076
            logger.debug("No atom ids: " + elid.size(), " != " + atomCounter);
 
1077
        }
 
1078
 
 
1079
        if (elsym.size() == atomCounter) {
 
1080
            hasSymbols = true;
 
1081
        } else {
 
1082
            logger.debug(
 
1083
                    "No atom symbols: " + elsym.size(), " != " + atomCounter);
 
1084
        }
 
1085
 
 
1086
        if (eltitles.size() == atomCounter) {
 
1087
            hasTitles = true;
 
1088
        } else {
 
1089
            logger.debug(
 
1090
                    "No atom titles: " + eltitles.size(), " != " + atomCounter);
 
1091
        }
 
1092
 
 
1093
        if ((x3.size() == atomCounter) && (y3.size() == atomCounter) && 
 
1094
            (z3.size() == atomCounter)) {
 
1095
            has3D = true;
 
1096
        } else {
 
1097
            logger.debug(
 
1098
                    "No 3D info: " + x3.size(), " " + y3.size(), " " + 
 
1099
                    z3.size(), " != " + atomCounter);
 
1100
        }
 
1101
 
 
1102
        if ((xfract.size() == atomCounter) && (yfract.size() == atomCounter) && 
 
1103
            (zfract.size() == atomCounter)) {
 
1104
            has3Dfract = true;
 
1105
        } else {
 
1106
            logger.debug(
 
1107
                    "No 3D fractional info: " + xfract.size(), " " + yfract.size(), " " + 
 
1108
                    zfract.size(), " != " + atomCounter);
 
1109
        }
 
1110
 
 
1111
        if ((x2.size() == atomCounter) && (y2.size() == atomCounter)) {
 
1112
            has2D = true;
 
1113
        } else {
 
1114
            logger.debug(
 
1115
                    "No 2D info: " + x2.size(), " " + y2.size(), " != " + 
 
1116
                    atomCounter);
 
1117
        }
 
1118
 
 
1119
        if (formalCharges.size() == atomCounter) {
 
1120
            hasFormalCharge = true;
 
1121
        } else {
 
1122
            logger.debug(
 
1123
                    "No formal Charge info: " + formalCharges.size(), 
 
1124
                    " != " + atomCounter);
 
1125
        }
 
1126
 
 
1127
        if (partialCharges.size() == atomCounter) {
 
1128
            hasPartialCharge = true;
 
1129
        } else {
 
1130
            logger.debug(
 
1131
                    "No partial Charge info: " + partialCharges.size(),
 
1132
                    " != " + atomCounter);
 
1133
        }
 
1134
 
 
1135
        if (hCounts.size() == atomCounter) {
 
1136
            hasHCounts = true;
 
1137
        } else {
 
1138
            logger.debug(
 
1139
                    "No hydrogen Count info: " + hCounts.size(), 
 
1140
                    " != " + atomCounter);
 
1141
        }
 
1142
 
 
1143
        if (spinMultiplicities.size() == atomCounter) {
 
1144
            hasSpinMultiplicities = true;
 
1145
        } else {
 
1146
            logger.debug(
 
1147
                    "No spinMultiplicity info: " + spinMultiplicities.size(),
 
1148
                    " != " + atomCounter);
 
1149
        }
 
1150
 
 
1151
        if (occupancies.size() == atomCounter) {
 
1152
            hasOccupancies = true;
 
1153
        } else {
 
1154
            logger.debug(
 
1155
                    "No occupancy info: " + occupancies.size(),
 
1156
                    " != " + atomCounter);
 
1157
        }
 
1158
 
 
1159
        if (atomDictRefs.size() == atomCounter) {
 
1160
            hasDictRefs = true;
 
1161
        } else {
 
1162
            logger.debug(
 
1163
                    "No dictRef info: " + atomDictRefs.size(),
 
1164
                    " != " + atomCounter);
 
1165
        }
 
1166
 
 
1167
        if (isotope.size() == atomCounter) {
 
1168
            hasIsotopes = true;
 
1169
        } else {
 
1170
            logger.debug(
 
1171
                    "No isotope info: " + isotope.size(),
 
1172
                    " != " + atomCounter);
 
1173
        }
 
1174
 
 
1175
        for (int i = 0; i < atomCounter; i++) {
 
1176
            logger.info("Storing atom: ", i);
 
1177
//            cdo.startObject("Atom");
 
1178
            currentAtom = currentChemFile.getBuilder().newAtom("H");
 
1179
            logger.debug("Atom # " + atomCounter);
 
1180
            if (hasID) {
 
1181
//                cdo.setObjectProperty("Atom", "id", (String)elid.get(i));
 
1182
                logger.debug("id: ", (String)elid.get(i));
 
1183
                currentAtom.setID((String)elid.get(i));
 
1184
                atomEnumeration.put((String)elid.get(i), currentAtom);
 
1185
            }
 
1186
            if (hasTitles) {
 
1187
                if (hasSymbols) {
 
1188
                    String symbol = (String)elsym.get(i);
 
1189
                    if (symbol.equals("Du") || symbol.equals("Dummy")) {
 
1190
//                        cdo.setObjectProperty("PseudoAtom", "label", (String)eltitles.get(i));
 
1191
                        if (!(currentAtom instanceof IPseudoAtom)) {
 
1192
                            currentAtom = currentChemFile.getBuilder().newPseudoAtom(currentAtom);
 
1193
                            if (hasID)
 
1194
                                atomEnumeration.put((String)elid.get(i), currentAtom);
 
1195
                        }
 
1196
                        ((IPseudoAtom)currentAtom).setLabel((String)eltitles.get(i));
 
1197
                    } else {
 
1198
//                        cdo.setObjectProperty("Atom", "title", (String)eltitles.get(i));
 
1199
                        // FIXME: huh?
 
1200
                        if (eltitles.get(i) != null)
 
1201
                                currentAtom.setProperty(CDKConstants.TITLE, (String)eltitles.get(i));
 
1202
                    }
 
1203
                } else {
 
1204
//                    cdo.setObjectProperty("Atom", "title", (String)eltitles.get(i));
 
1205
                        // FIXME: huh?
 
1206
                        if (eltitles.get(i) != null)
 
1207
                                currentAtom.setProperty(CDKConstants.TITLE, (String)eltitles.get(i));
 
1208
                }
 
1209
            }
 
1210
 
 
1211
            // store optional atom properties
 
1212
            if (hasSymbols) {
 
1213
                String symbol = (String)elsym.get(i);
 
1214
                if (symbol.equals("Du") || symbol.equals("Dummy")) {
 
1215
                    symbol = "R";
 
1216
                }
 
1217
//                cdo.setObjectProperty("Atom", "type", symbol);
 
1218
                if (symbol.equals("R") && !(currentAtom instanceof IPseudoAtom)) {
 
1219
                    currentAtom = currentChemFile.getBuilder().newPseudoAtom(currentAtom);
 
1220
                    if (hasID)
 
1221
                        atomEnumeration.put((String)elid.get(i), currentAtom);
 
1222
                }
 
1223
                currentAtom.setSymbol(symbol);
 
1224
                try{
 
1225
                        IsotopeFactory.getInstance(currentAtom.getBuilder()).configure(currentAtom);
 
1226
                }catch(Exception ex){
 
1227
                        logger.warn("Could not configure atom");
 
1228
                }
 
1229
            }
 
1230
 
 
1231
            if (has3D) {
 
1232
//                cdo.setObjectProperty("Atom", "x3", (String)x3.get(i));
 
1233
//                cdo.setObjectProperty("Atom", "y3", (String)y3.get(i));
 
1234
//                cdo.setObjectProperty("Atom", "z3", (String)z3.get(i));
 
1235
                if (x3.get(i) != null &&
 
1236
                        y3.get(i) != null &&
 
1237
                        z3.get(i) != null) {
 
1238
                        currentAtom.setPoint3d(
 
1239
                                        new Point3d(
 
1240
                                                Double.parseDouble((String)x3.get(i)),
 
1241
                                                Double.parseDouble((String)y3.get(i)),
 
1242
                                        Double.parseDouble((String)z3.get(i))
 
1243
                                )
 
1244
                        );
 
1245
                }
 
1246
            }
 
1247
 
 
1248
            if (has3Dfract) {
 
1249
                // ok, need to convert fractional into eucledian coordinates
 
1250
//                cdo.setObjectProperty("Atom", "xFract", (String)xfract.get(i));
 
1251
//                cdo.setObjectProperty("Atom", "yFract", (String)yfract.get(i));
 
1252
//                cdo.setObjectProperty("Atom", "zFract", (String)zfract.get(i));
 
1253
                currentAtom.setFractionalPoint3d(
 
1254
                        new Point3d(
 
1255
                                Double.parseDouble((String)xfract.get(i)),
 
1256
                                Double.parseDouble((String)yfract.get(i)),
 
1257
                                Double.parseDouble((String)zfract.get(i))
 
1258
                        )
 
1259
                );
 
1260
            }
 
1261
 
 
1262
            if (hasFormalCharge) {
 
1263
//                cdo.setObjectProperty("Atom", "formalCharge", 
 
1264
//                                      (String)formalCharges.get(i));
 
1265
                currentAtom.setFormalCharge(Integer.parseInt((String)formalCharges.get(i)));
 
1266
            }
 
1267
 
 
1268
            if (hasPartialCharge) {
 
1269
                logger.debug("Storing partial atomic charge...");
 
1270
//                cdo.setObjectProperty("Atom", "partialCharge", 
 
1271
//                                      (String)partialCharges.get(i));
 
1272
                currentAtom.setCharge(Double.parseDouble((String)partialCharges.get(i)));
 
1273
            }
 
1274
 
 
1275
            if (hasHCounts) {
 
1276
//                cdo.setObjectProperty("Atom", "hydrogenCount", (String)hCounts.get(i));
 
1277
                // FIXME: the hCount in CML is the total of implicit *and* explicit
 
1278
                currentAtom.setHydrogenCount(Integer.parseInt((String)hCounts.get(i)));
 
1279
            }
 
1280
 
 
1281
            if (has2D) {
 
1282
                if (x2.get(i) != null && y2.get(i) != null) {
 
1283
//                    cdo.setObjectProperty("Atom", "x2", (String)x2.get(i));
 
1284
//                    cdo.setObjectProperty("Atom", "y2", (String)y2.get(i));
 
1285
                        currentAtom.setPoint2d(
 
1286
                                new Point2d(
 
1287
                                        Double.parseDouble((String)x2.get(i)),
 
1288
                                        Double.parseDouble((String)y2.get(i))
 
1289
                                )
 
1290
                        );
 
1291
                }
 
1292
            }
 
1293
            
 
1294
            if (hasDictRefs) {
 
1295
//                cdo.setObjectProperty("Atom", "dictRef", (String)atomDictRefs.get(i));
 
1296
                if (atomDictRefs.get(i) != null)
 
1297
                        currentAtom.setProperty("org.openscience.cdk.dict", (String)atomDictRefs.get(i));
 
1298
            }
 
1299
 
 
1300
            if (hasSpinMultiplicities && spinMultiplicities.get(i) != null) {
 
1301
//                cdo.setObjectProperty("Atom", "spinMultiplicity", (String)spinMultiplicities.get(i));
 
1302
                int unpairedElectrons = Integer.parseInt((String)spinMultiplicities.get(i))-1;
 
1303
                for (int sm=0; sm<unpairedElectrons; sm++) {
 
1304
                    currentMolecule.addSingleElectron(currentChemFile.getBuilder().newSingleElectron(currentAtom));
 
1305
                }
 
1306
            }
 
1307
 
 
1308
            if (hasOccupancies && occupancies.get(i) != null) {
 
1309
//                cdo.setObjectProperty("Atom", "occupanciy", (String)occupancies.get(i));
 
1310
                // FIXME: this has no ChemFileCDO equivalent, not even if spelled correctly
 
1311
            }
 
1312
 
 
1313
            if (hasIsotopes) {
 
1314
//                cdo.setObjectProperty("Atom", "massNumber", (String)isotope.get(i));
 
1315
                if (isotope.get(i) != null)
 
1316
                        currentAtom.setMassNumber((int)Double.parseDouble((String)isotope.get(i)));
 
1317
            }
 
1318
 
 
1319
//            cdo.endObject("Atom");
 
1320
            currentMolecule.addAtom(currentAtom);
 
1321
        }
 
1322
        if (elid.size() > 0) {
 
1323
            // assume this is the current working list
 
1324
            bondElid = elid;
 
1325
        }
 
1326
        newAtomData();
 
1327
    }
 
1328
    
 
1329
    protected void storeBondData() {
 
1330
        logger.debug(
 
1331
                "Testing a1,a2,stereo,order = count: " + bondARef1.size(), "," + 
 
1332
                bondARef2.size(), "," + bondStereo.size(), "," + order.size(), "=" +
 
1333
                bondCounter);
 
1334
 
 
1335
        if ((bondARef1.size() == bondCounter) && 
 
1336
            (bondARef2.size() == bondCounter)) {
 
1337
            logger.debug("About to add bond info...");
 
1338
 
 
1339
            Iterator orders = order.iterator();
 
1340
            Iterator ids = bondid.iterator();
 
1341
            Iterator bar1s = bondARef1.iterator();
 
1342
            Iterator bar2s = bondARef2.iterator();
 
1343
            Iterator stereos = bondStereo.iterator();
 
1344
            Iterator aroms = bondAromaticity.iterator();
 
1345
 
 
1346
            while (bar1s.hasNext()) {
 
1347
//                cdo.startObject("Bond");
 
1348
//                if (ids.hasNext()) {
 
1349
//                    cdo.setObjectProperty("Bond", "id", (String)ids.next());
 
1350
//                }
 
1351
//                cdo.setObjectProperty("Bond", "atom1", 
 
1352
//                                      new Integer(bondElid.indexOf(
 
1353
//                                                          (String)bar1s.next())).toString());
 
1354
//                cdo.setObjectProperty("Bond", "atom2", 
 
1355
//                                      new Integer(bondElid.indexOf(
 
1356
//                                                          (String)bar2s.next())).toString());
 
1357
                IAtom a1 = (IAtom)atomEnumeration.get((String)bar1s.next());
 
1358
                IAtom a2 = (IAtom)atomEnumeration.get((String)bar2s.next());
 
1359
                currentBond = currentChemFile.getBuilder().newBond(a1, a2);
 
1360
                if (ids.hasNext()) {
 
1361
                        currentBond.setID((String)ids.next());
 
1362
                }
 
1363
 
 
1364
                if (orders.hasNext()) {
 
1365
                    String bondOrder = (String)orders.next();
 
1366
                    
 
1367
                    if ("S".equals(bondOrder)) {
 
1368
//                        cdo.setObjectProperty("Bond", "order", "1");
 
1369
                        currentBond.setOrder(CDKConstants.BONDORDER_SINGLE);
 
1370
                    } else if ("D".equals(bondOrder)) {
 
1371
//                        cdo.setObjectProperty("Bond", "order", "2");
 
1372
                        currentBond.setOrder(CDKConstants.BONDORDER_DOUBLE);
 
1373
                    } else if ("T".equals(bondOrder)) {
 
1374
//                        cdo.setObjectProperty("Bond", "order", "3");
 
1375
                        currentBond.setOrder(CDKConstants.BONDORDER_TRIPLE);
 
1376
                    } else if ("A".equals(bondOrder)) {
 
1377
//                        cdo.setObjectProperty("Bond", "order", "1.5");
 
1378
                        currentBond.setOrder(CDKConstants.BONDORDER_SINGLE);
 
1379
                        currentBond.setFlag(CDKConstants.ISAROMATIC, true);
 
1380
                    } else {
 
1381
//                        cdo.setObjectProperty("Bond", "order", bondOrder);
 
1382
                        currentBond.setOrder(Double.parseDouble(bondOrder));
 
1383
                    }
 
1384
                }
 
1385
 
 
1386
                if (stereos.hasNext()) {
 
1387
//                    cdo.setObjectProperty("Bond", "stereo", 
 
1388
//                                          (String)stereos.next());
 
1389
                        String nextStereo = (String)stereos.next();
 
1390
                    if ("H".equals(nextStereo)) {
 
1391
                        currentBond.setStereo(CDKConstants.STEREO_BOND_DOWN);
 
1392
                    } else if ("W".equals(nextStereo)) {
 
1393
                        currentBond.setStereo(CDKConstants.STEREO_BOND_UP);
 
1394
                    } else if (nextStereo != null){
 
1395
                        logger.warn("Cannot interpret stereo information: " + nextStereo);
 
1396
                    }
 
1397
                }
 
1398
 
 
1399
                if (aroms.hasNext()) {
 
1400
                        Object nextArom = aroms.next();
 
1401
                        if (nextArom != null && nextArom == Boolean.TRUE) {
 
1402
                                currentBond.setFlag(CDKConstants.ISAROMATIC, true);
 
1403
                        }
 
1404
                }
 
1405
 
 
1406
//                cdo.endObject("Bond");
 
1407
                currentMolecule.addBond(currentBond);
 
1408
            }
 
1409
        }
 
1410
        newBondData();
 
1411
    }
 
1412
 
 
1413
    protected int addArrayElementsTo(List toAddto, String array) {
 
1414
        StringTokenizer tokenizer = new StringTokenizer(array);
 
1415
        int i = 0;
 
1416
        while (tokenizer.hasMoreElements()) {
 
1417
            toAddto.add(tokenizer.nextToken());
 
1418
            i++;
 
1419
        }
 
1420
        return i;
 
1421
    }
 
1422
}