~ubuntu-branches/ubuntu/trusty/cdk/trusty-proposed

« back to all changes in this revision

Viewing changes to src/org/openscience/cdk/smiles/SmilesParser.java

  • Committer: Bazaar Package Importer
  • Author(s): Paul Cager
  • Date: 2008-04-09 21:17:53 UTC
  • Revision ID: james.westby@ubuntu.com-20080409211753-46lmjw5z8mx5pd8d
Tags: upstream-1.0.2
ImportĀ upstreamĀ versionĀ 1.0.2

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*  $Revision: 9050 $ $Author: egonw $ $Date: 2007-10-14 20:23:40 +0200 (Sun, 14 Oct 2007) $
 
2
 *
 
3
 *  Copyright (C) 2002-2007  Christoph Steinbeck <steinbeck@users.sf.net>
 
4
 *
 
5
 *  Contact: cdk-devel@lists.sourceforge.net
 
6
 *
 
7
 *  This program is free software; you can redistribute it and/or
 
8
 *  modify it under the terms of the GNU Lesser General Public License
 
9
 *  as published by the Free Software Foundation; either version 2.1
 
10
 *  of the License, or (at your option) any later version.
 
11
 *  All I ask is that proper credit is given for my work, which includes
 
12
 *  - but is not limited to - adding the above copyright notice to the beginning
 
13
 *  of your source code files, and to any copyright notice that you may distribute
 
14
 *  with programs based on this work.
 
15
 *
 
16
 *  This program is distributed in the hope that it will be useful,
 
17
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 
18
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
19
 *  GNU Lesser General Public License for more details.
 
20
 *
 
21
 *  You should have received a copy of the GNU Lesser General Public License
 
22
 *  along with this program; if not, write to the Free Software
 
23
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 
24
 */
 
25
package org.openscience.cdk.smiles;
 
26
 
 
27
import java.util.Enumeration;
 
28
import java.util.Stack;
 
29
import java.util.StringTokenizer;
 
30
 
 
31
import org.openscience.cdk.CDKConstants;
 
32
import org.openscience.cdk.DefaultChemObjectBuilder;
 
33
import org.openscience.cdk.aromaticity.HueckelAromaticityDetector;
 
34
import org.openscience.cdk.exception.CDKException;
 
35
import org.openscience.cdk.exception.InvalidSmilesException;
 
36
import org.openscience.cdk.graph.ConnectivityChecker;
 
37
import org.openscience.cdk.interfaces.IAtom;
 
38
import org.openscience.cdk.interfaces.IAtomContainer;
 
39
import org.openscience.cdk.interfaces.IBond;
 
40
import org.openscience.cdk.interfaces.IChemObjectBuilder;
 
41
import org.openscience.cdk.interfaces.IMolecule;
 
42
import org.openscience.cdk.interfaces.IMoleculeSet;
 
43
import org.openscience.cdk.interfaces.IReaction;
 
44
import org.openscience.cdk.tools.HydrogenAdder;
 
45
import org.openscience.cdk.tools.LoggingTool;
 
46
import org.openscience.cdk.tools.ValencyHybridChecker;
 
47
/**
 
48
 * Parses a SMILES {@cdk.cite SMILESTUT} string and an AtomContainer. The full
 
49
 * SSMILES subset {@cdk.cite SSMILESTUT} and the '%' tag for more than 10 rings
 
50
 * at a time are supported. An example:
 
51
 * <pre>
 
52
 * try {
 
53
 *   SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance());
 
54
 *   IMolecule m = sp.parseSmiles("c1ccccc1");
 
55
 * } catch (InvalidSmilesException ise) {
 
56
 * }
 
57
 * </pre>
 
58
 *
 
59
 * <p>This parser does not parse stereochemical information, but the following
 
60
 * features are supported: reaction smiles, partitioned structures, charged
 
61
 * atoms, implicit hydrogen count, '*' and isotope information.
 
62
 *
 
63
 * <p>See {@cdk.cite WEI88} for further information.
 
64
 *
 
65
 * @author         Christoph Steinbeck
 
66
 * @author         Egon Willighagen
 
67
 * @cdk.module     smiles
 
68
 * @cdk.created    2002-04-29
 
69
 * @cdk.keyword    SMILES, parser
 
70
 * @cdk.bug        1274464
 
71
 * @cdk.bug        1363882
 
72
 * @cdk.bug        1503541
 
73
 * @cdk.bug        1535587
 
74
 * @cdk.bug        1541333
 
75
 * @cdk.bug        1579229
 
76
 * @cdk.bug        1579230
 
77
 * @cdk.bug        1579231
 
78
 * @cdk.bug        1579235
 
79
 * @cdk.bug        1579244
 
80
 * 
 
81
 * @see            org.openscience.cdk.smiles.InterruptableSmilesParser
 
82
 */
 
83
public class SmilesParser {
 
84
 
 
85
        private LoggingTool logger;
 
86
        private HydrogenAdder hAdder;
 
87
//      private SmilesValencyChecker valencyChecker;
 
88
        private ValencyHybridChecker valencyChecker;
 
89
                
 
90
        private int status = 0;
 
91
        protected IChemObjectBuilder builder;
 
92
 
 
93
 
 
94
        /**
 
95
         * Constructor for the SmilesParser object.
 
96
         * 
 
97
         * @deprecated Use SmilesParser(IChemObjectBuilder instead)
 
98
         */
 
99
        public SmilesParser()
 
100
        {
 
101
                this(DefaultChemObjectBuilder.getInstance());
 
102
        }
 
103
        
 
104
        /**
 
105
         * Constructor for the SmilesParser object.
 
106
         * 
 
107
         * @param builder IChemObjectBuilder used to create the IMolecules from
 
108
         */
 
109
        public SmilesParser(IChemObjectBuilder builder)
 
110
        {
 
111
                logger = new LoggingTool(this);
 
112
                this.builder = builder;
 
113
                try
 
114
                {
 
115
                        valencyChecker = new ValencyHybridChecker();
 
116
                        hAdder = new HydrogenAdder(valencyChecker);
 
117
                } catch (Exception exception)
 
118
                {
 
119
                        logger.error("Could not instantiate valencyChecker or hydrogenAdder: ",
 
120
                                        exception.getMessage());
 
121
                        logger.debug(exception);
 
122
                }
 
123
        }
 
124
 
 
125
        int position = -1;
 
126
        int nodeCounter = -1;
 
127
        String smiles = null;
 
128
        double bondStatus = -1;
 
129
        double bondStatusForRingClosure = 1;
 
130
    boolean bondIsAromatic = false;
 
131
        IAtom[] rings = null;
 
132
        double[] ringbonds = null;
 
133
        int thisRing = -1;
 
134
        IMolecule molecule = null;
 
135
        String currentSymbol = null;
 
136
 
 
137
        public IReaction parseReactionSmiles(String smiles) throws InvalidSmilesException
 
138
        {
 
139
                StringTokenizer tokenizer = new StringTokenizer(smiles, ">");
 
140
                String reactantSmiles = tokenizer.nextToken();
 
141
                String agentSmiles = "";
 
142
                String productSmiles = tokenizer.nextToken();
 
143
                if (tokenizer.hasMoreTokens())
 
144
                {
 
145
                        agentSmiles = productSmiles;
 
146
                        productSmiles = tokenizer.nextToken();
 
147
                }
 
148
 
 
149
                IReaction reaction = builder.newReaction();
 
150
 
 
151
                // add reactants
 
152
                IMolecule reactantContainer = parseSmiles(reactantSmiles);
 
153
                IMoleculeSet reactantSet = ConnectivityChecker.partitionIntoMolecules(reactantContainer);
 
154
                for (int i = 0; i < reactantSet.getAtomContainerCount(); i++)
 
155
                {
 
156
                        reaction.addReactant(reactantSet.getMolecule(i));
 
157
                }
 
158
 
 
159
                // add reactants
 
160
                if (agentSmiles.length() > 0)
 
161
                {
 
162
                        IMolecule agentContainer = parseSmiles(agentSmiles);
 
163
                        IMoleculeSet agentSet = ConnectivityChecker.partitionIntoMolecules(agentContainer);
 
164
                        for (int i = 0; i < agentSet.getAtomContainerCount(); i++)
 
165
                        {
 
166
                                reaction.addAgent(agentSet.getMolecule(i));
 
167
                        }
 
168
                }
 
169
 
 
170
                // add products
 
171
                IMolecule productContainer = parseSmiles(productSmiles);
 
172
                IMoleculeSet productSet = ConnectivityChecker.partitionIntoMolecules(productContainer);
 
173
                for (int i = 0; i < productSet.getAtomContainerCount(); i++)
 
174
                {
 
175
                        reaction.addProduct(productSet.getMolecule(i));
 
176
                }
 
177
 
 
178
                return reaction;
 
179
        }
 
180
 
 
181
 
 
182
        /**
 
183
         *  Parses a SMILES string and returns a Molecule object.
 
184
         *
 
185
         *@param  smiles                      A SMILES string
 
186
         *@return                             A Molecule representing the constitution
 
187
         *      given in the SMILES string
 
188
         *@exception  InvalidSmilesException  Exception thrown when the SMILES string
 
189
         *      is invalid
 
190
         */
 
191
        public IMolecule parseSmiles(String smiles) throws InvalidSmilesException {
 
192
                setInterrupted(false);
 
193
                
 
194
                DeduceBondSystemTool dbst=new DeduceBondSystemTool();
 
195
 
 
196
                IMolecule m2=this.parseString(smiles);
 
197
 
 
198
                IMolecule m=null;
 
199
 
 
200
                try {
 
201
                        m=(IMolecule)m2.clone();
 
202
 
 
203
                } catch (java.lang.CloneNotSupportedException exception) {
 
204
                        logger.debug(exception);
 
205
                }
 
206
 
 
207
                // add implicit hydrogens
 
208
                this.addImplicitHydrogens(m);
 
209
 
 
210
                // setup missing bond orders
 
211
                this.setupMissingBondOrders(m);
 
212
 
 
213
                // conceive aromatic perception
 
214
                this.conceiveAromaticPerception(m);
 
215
 
 
216
                boolean HaveSP2=false;
 
217
 
 
218
                for (int j=0;j<=m.getAtomCount()-1;j++) {
 
219
                        if (m.getAtom(j).getHybridization()==2) {
 
220
                                HaveSP2=true;
 
221
                                break;
 
222
                        }
 
223
                }
 
224
 
 
225
                if (HaveSP2) {  // have lower case (aromatic) element symbols that may need to be fixed
 
226
                        try {
 
227
                                dbst.setInterrupted(isInterrupted());
 
228
                                if (!(dbst.isOK(m))) {
 
229
 
 
230
                                        // need to fix it:
 
231
                                        m = (IMolecule) dbst.fixAromaticBondOrders(m2);
 
232
 
 
233
                                        if (!(m instanceof IMolecule)) {
 
234
                                                throw new InvalidSmilesException("Could not deduce aromatic bond orders.");
 
235
                                        }
 
236
                                } else {
 
237
                                        // doesnt need to fix aromatic bond orders
 
238
                                }
 
239
 
 
240
                        } catch (CDKException ex) {
 
241
                                throw new InvalidSmilesException(ex.getMessage(), ex);
 
242
                        }
 
243
                }
 
244
 
 
245
                return (IMolecule)m;
 
246
        }
 
247
 
 
248
        /**
 
249
         * This routine parses the smiles string into a molecule but does not add hydrogens, saturate, or perceive aromaticity
 
250
         * @param smiles
 
251
         * @return
 
252
         * @throws InvalidSmilesException
 
253
         */
 
254
        private IMolecule parseString(String smiles) throws InvalidSmilesException
 
255
        {
 
256
                logger.debug("parseSmiles()...");
 
257
                IBond bond = null;
 
258
                nodeCounter = 0;
 
259
                bondStatus = 0;
 
260
        bondIsAromatic = false;
 
261
                boolean bondExists = true;
 
262
                thisRing = -1;
 
263
                currentSymbol = null;
 
264
                molecule = builder.newMolecule();
 
265
                position = 0;
 
266
                // we don't want more than 1024 rings
 
267
                rings = new IAtom[1024];
 
268
                ringbonds = new double[1024];
 
269
                for (int f = 0; f < 1024; f++)
 
270
                {
 
271
                        rings[f] = null;
 
272
                        ringbonds[f] = -1;
 
273
                }
 
274
 
 
275
                char mychar = 'X';
 
276
                char[] chars = new char[1];
 
277
                IAtom lastNode = null;
 
278
                Stack atomStack = new Stack();
 
279
                Stack bondStack = new Stack();
 
280
                IAtom atom = null;
 
281
                do
 
282
                {
 
283
                        try
 
284
                        {
 
285
                                mychar = smiles.charAt(position);
 
286
                                logger.debug("");
 
287
                                logger.debug("Processing: " + mychar);
 
288
                                if (lastNode != null)
 
289
                                {
 
290
                                        logger.debug("Lastnode: ", lastNode.hashCode());
 
291
                                }
 
292
                                if ((mychar >= 'A' && mychar <= 'Z') || (mychar >= 'a' && mychar <= 'z') ||
 
293
                                                (mychar == '*'))
 
294
                                {
 
295
                                        status = 1;
 
296
                                        logger.debug("Found a must-be 'organic subset' element");
 
297
                                        // only 'organic subset' elements allowed
 
298
                                        atom = null;
 
299
                                        if (mychar == '*')
 
300
                                        {
 
301
                                                currentSymbol = "*";
 
302
                                                atom = builder.newPseudoAtom("*");
 
303
                                        } else
 
304
                                        {
 
305
                                                currentSymbol = getSymbolForOrganicSubsetElement(smiles, position);
 
306
                                                if (currentSymbol != null)
 
307
                                                {
 
308
                                                        if (currentSymbol.length() == 1)
 
309
                                                        {
 
310
                                                                if (!(currentSymbol.toUpperCase()).equals(currentSymbol))
 
311
                                                                {
 
312
                                                                        currentSymbol = currentSymbol.toUpperCase();
 
313
                                                                        atom = builder.newAtom(currentSymbol);
 
314
                                                                        atom.setHybridization(CDKConstants.HYBRIDIZATION_SP2);
 
315
                                                                } else
 
316
                                                                {
 
317
                                                                        atom = builder.newAtom(currentSymbol);
 
318
                                                                }
 
319
                                                        } else
 
320
                                                        {
 
321
                                                                atom = builder.newAtom(currentSymbol);
 
322
                                                        }
 
323
                                                        logger.debug("Made atom: ", atom);
 
324
                                                } else
 
325
                                                {
 
326
                                                        throw new InvalidSmilesException(
 
327
                                                                        "Found element which is not a 'organic subset' element. You must " +
 
328
                                                                        "use [" + mychar + "].");
 
329
                                                }
 
330
                                        }
 
331
 
 
332
                                        molecule.addAtom(atom);
 
333
                                        logger.debug("Adding atom ", atom.hashCode());
 
334
                                        if ((lastNode != null) && bondExists)
 
335
                                        {
 
336
                                                logger.debug("Creating bond between ", atom.getSymbol(), " and ", lastNode.getSymbol());
 
337
                                                bond = builder.newBond(atom, lastNode, bondStatus);
 
338
                                                            if (bondIsAromatic) {
 
339
                            bond.setFlag(CDKConstants.ISAROMATIC, true);
 
340
                        }
 
341
                                                molecule.addBond(bond);
 
342
                                        }
 
343
                                        bondStatus = CDKConstants.BONDORDER_SINGLE;
 
344
                                        lastNode = atom;
 
345
                                        nodeCounter++;
 
346
                                        position = position + currentSymbol.length();
 
347
                                        bondExists = true;
 
348
                    bondIsAromatic = false;
 
349
                                } else if (mychar == '=')
 
350
                                {
 
351
                                        position++;
 
352
                                        if (status == 2 || !((smiles.charAt(position) >= '0' && smiles.charAt(position) <= '9') || smiles.charAt(position) == '%'))
 
353
                                        {
 
354
                                                bondStatus = CDKConstants.BONDORDER_DOUBLE;
 
355
                                        } else
 
356
                                        {
 
357
                                                bondStatusForRingClosure = CDKConstants.BONDORDER_DOUBLE;
 
358
                                        }
 
359
                                } else if (mychar == '#')
 
360
                                {
 
361
                                        position++;
 
362
                                        if (status == 2 || !((smiles.charAt(position) >= '0' && smiles.charAt(position) <= '9') || smiles.charAt(position) == '%'))
 
363
                                        {
 
364
                                                bondStatus = CDKConstants.BONDORDER_TRIPLE;
 
365
                                        } else
 
366
                                        {
 
367
                                                bondStatusForRingClosure = CDKConstants.BONDORDER_TRIPLE;
 
368
                                        }
 
369
                                } else if (mychar == '(')
 
370
                                {
 
371
                                        atomStack.push(lastNode);
 
372
                                        logger.debug("Stack:");
 
373
                                        Enumeration ses = atomStack.elements();
 
374
                                        while (ses.hasMoreElements())
 
375
                                        {
 
376
                                                IAtom a = (IAtom) ses.nextElement();
 
377
                                                logger.debug("", a.hashCode());
 
378
                                        }
 
379
                                        logger.debug("------");
 
380
                                        bondStack.push(new Double(bondStatus));
 
381
                                        position++;
 
382
                                } else if (mychar == ')')
 
383
                                {
 
384
                                        lastNode = (IAtom) atomStack.pop();
 
385
                                        logger.debug("Stack:");
 
386
                                        Enumeration ses = atomStack.elements();
 
387
                                        while (ses.hasMoreElements())
 
388
                                        {
 
389
                                                IAtom a = (IAtom) ses.nextElement();
 
390
                                                logger.debug("", a.hashCode());
 
391
                                        }
 
392
                                        logger.debug("------");
 
393
                                        bondStatus = ((Double) bondStack.pop()).doubleValue();
 
394
                                        position++;
 
395
                                } else if (mychar >= '0' && mychar <= '9')
 
396
                                {
 
397
                                        status = 2;
 
398
                                        chars[0] = mychar;
 
399
                                        currentSymbol = new String(chars);
 
400
                                        thisRing = (new Integer(currentSymbol)).intValue();
 
401
                                        handleRing(lastNode);
 
402
                                        position++;
 
403
                                } else if (mychar == '%')
 
404
                                {
 
405
                                        currentSymbol = getRingNumber(smiles, position);
 
406
                                        thisRing = (new Integer(currentSymbol)).intValue();
 
407
                                        handleRing(lastNode);
 
408
                                        position += currentSymbol.length() + 1;
 
409
                                } else if (mychar == '[')
 
410
                                {
 
411
                                        currentSymbol = getAtomString(smiles, position);
 
412
                                        atom = assembleAtom(currentSymbol);
 
413
                                        molecule.addAtom(atom);
 
414
                                        logger.debug("Added atom: ", atom);
 
415
                                        if (lastNode != null && bondExists)
 
416
                                        {
 
417
                                                bond = builder.newBond(atom, lastNode, bondStatus);
 
418
                                                            if (bondIsAromatic) {
 
419
                            bond.setFlag(CDKConstants.ISAROMATIC, true);
 
420
                        }
 
421
                                                molecule.addBond(bond);
 
422
                                                logger.debug("Added bond: ", bond);
 
423
                                        }
 
424
                                        bondStatus = CDKConstants.BONDORDER_SINGLE;
 
425
                    bondIsAromatic = false;
 
426
                                        lastNode = atom;
 
427
                                        nodeCounter++;
 
428
                                        position = position + currentSymbol.length() + 2;
 
429
                                        // plus two for [ and ]
 
430
                                        bondExists = true;
 
431
                                } else if (mychar == '.')
 
432
                                {
 
433
                                        bondExists = false;
 
434
                                        position++;
 
435
                                } else if (mychar == '-')
 
436
                                {
 
437
                                        bondExists = true;
 
438
                                        // a simple single bond
 
439
                                        position++;
 
440
                } else if (mychar == ':') {
 
441
                    bondExists = true;
 
442
                    bondIsAromatic = true;
 
443
                    position++;
 
444
                                } else if (mychar == '/' || mychar == '\\')
 
445
                                {
 
446
                                        logger.warn("Ignoring stereo information for double bond");
 
447
                                        position++;
 
448
                                } else if (mychar == '@')
 
449
                                {
 
450
                                        if (position < smiles.length() - 1 && smiles.charAt(position + 1) == '@')
 
451
                                        {
 
452
                                                position++;
 
453
                                        }
 
454
                                        logger.warn("Ignoring stereo information for atom");
 
455
                                        position++;
 
456
                                } else
 
457
                                {
 
458
                                        throw new InvalidSmilesException("Unexpected character found: " + mychar);
 
459
                                }
 
460
                        } catch (InvalidSmilesException exc)
 
461
                        {
 
462
                                logger.error("InvalidSmilesException while parsing char (in parseSmiles()): " + mychar);
 
463
                                logger.debug(exc);
 
464
                                throw exc;
 
465
                        } catch (Exception exception)
 
466
                        {
 
467
                                logger.error("Error while parsing char: " + mychar);
 
468
                                logger.debug(exception);
 
469
                                throw new InvalidSmilesException("Error while parsing char: " + mychar, exception);
 
470
                        }
 
471
                        logger.debug("Parsing next char");
 
472
                } while (position < smiles.length());
 
473
 
 
474
                return molecule;
 
475
        }
 
476
 
 
477
        private String getAtomString(String smiles, int pos) throws InvalidSmilesException
 
478
        {
 
479
                logger.debug("getAtomString()");
 
480
                StringBuffer atomString = new StringBuffer();
 
481
                try
 
482
                {
 
483
                        for (int f = pos + 1; f < smiles.length(); f++)
 
484
                        {
 
485
                                char character = smiles.charAt(f);
 
486
                                if (character == ']')
 
487
                                {
 
488
                                        break;
 
489
                                } else
 
490
                                {
 
491
                                        atomString.append(character);
 
492
                                }
 
493
                        }
 
494
                } catch (Exception exception)
 
495
                {
 
496
                        String message = "Problem parsing Atom specification given in brackets.\n";
 
497
                        message += "Invalid SMILES string was: " + smiles;
 
498
                        logger.error(message);
 
499
                        logger.debug(exception);
 
500
                        throw new InvalidSmilesException(message, exception);
 
501
                }
 
502
                return atomString.toString();
 
503
        }
 
504
 
 
505
        private int getCharge(String chargeString, int position)
 
506
        {
 
507
                logger.debug("getCharge(): Parsing charge from: ", chargeString.substring(position));
 
508
                int charge = 0;
 
509
                if (chargeString.charAt(position) == '+')
 
510
                {
 
511
                        charge = +1;
 
512
                        position++;
 
513
                } else if (chargeString.charAt(position) == '-')
 
514
                {
 
515
                        charge = -1;
 
516
                        position++;
 
517
                } else
 
518
                {
 
519
                        return charge;
 
520
                }
 
521
                StringBuffer multiplier = new StringBuffer();
 
522
                while (position < chargeString.length() && Character.isDigit(chargeString.charAt(position)))
 
523
                {
 
524
                        multiplier.append(chargeString.charAt(position));
 
525
                        position++;
 
526
                }
 
527
                if (multiplier.length() > 0)
 
528
                {
 
529
                        logger.debug("Found multiplier: ", multiplier);
 
530
                        try
 
531
                        {
 
532
                                charge = charge * Integer.parseInt(multiplier.toString());
 
533
                        } catch (Exception exception)
 
534
                        {
 
535
                                logger.error("Could not parse positive atomic charge!");
 
536
                                logger.debug(exception);
 
537
                        }
 
538
                }
 
539
                logger.debug("Found charge: ", charge);
 
540
                return charge;
 
541
        }
 
542
 
 
543
        private int getImplicitHydrogenCount(String s, int position)
 
544
        {
 
545
                logger.debug("getImplicitHydrogenCount(): Parsing implicit hydrogens from: " + s);
 
546
                int count = 1;
 
547
                if (s.charAt(position) == 'H')
 
548
                {
 
549
                        StringBuffer multiplier = new StringBuffer();
 
550
                        while (position < (s.length() - 1) && Character.isDigit(s.charAt(position + 1)))
 
551
                        {
 
552
                                multiplier.append(position + 1);
 
553
                                position++;
 
554
                        }
 
555
                        if (multiplier.length() > 0)
 
556
                        {
 
557
                                try
 
558
                                {
 
559
                                        count = count + Integer.parseInt(multiplier.toString());
 
560
                                } catch (Exception exception)
 
561
                                {
 
562
                                        logger.error("Could not parse number of implicit hydrogens!");
 
563
                                        logger.debug(exception);
 
564
                                }
 
565
                        }
 
566
                }
 
567
                return count;
 
568
        }
 
569
 
 
570
        private String getElementSymbol(String s, int pos)
 
571
        {
 
572
                logger.debug("getElementSymbol(): Parsing element symbol (pos=" + pos + ") from: " + s);
 
573
                // try to match elements not in the organic subset.
 
574
                // first, the two char elements
 
575
                if (pos < s.length() - 1)
 
576
                {
 
577
                        String possibleSymbol = s.substring(pos, pos + 2);
 
578
                        logger.debug("possibleSymbol: ", possibleSymbol);
 
579
                        if (("HeLiBeNeNaMgAlSiClArCaScTiCrMnFeCoNiCuZnGaGeAsSe".indexOf(possibleSymbol) >= 0) ||
 
580
                                        ("BrKrRbSrZrNbMoTcRuRhPdAgCdInSnSbTeXeCsBaLuHfTaRe".indexOf(possibleSymbol) >= 0) ||
 
581
                                        ("OsIrPtAuHgTlPbBiPoAtRnFrRaLrRfDbSgBhHsMtDs".indexOf(possibleSymbol) >= 0))
 
582
                        {
 
583
                                return possibleSymbol;
 
584
                        }
 
585
                }
 
586
                // if that fails, the one char elements
 
587
                String possibleSymbol = s.substring(pos, pos + 1);
 
588
                logger.debug("possibleSymbol: ", possibleSymbol);
 
589
                if (("HKUVY".indexOf(possibleSymbol) >= 0))
 
590
                {
 
591
                        return possibleSymbol;
 
592
                }
 
593
                // if that failed too, then possibly a organic subset element
 
594
                return getSymbolForOrganicSubsetElement(s, pos);
 
595
        }
 
596
 
 
597
 
 
598
        /**
 
599
         *  Gets the ElementSymbol for an element in the 'organic subset' for which
 
600
         *  brackets may be omited. <p>
 
601
         *
 
602
         *  See: <a href="http://www.daylight.com/dayhtml/smiles/smiles-atoms.html">
 
603
         *  http://www.daylight.com/dayhtml/smiles/smiles-atoms.html</a> .
 
604
         */
 
605
        private String getSymbolForOrganicSubsetElement(String s, int pos)
 
606
        {
 
607
                logger.debug("getSymbolForOrganicSubsetElement(): Parsing organic subset element from: ", s);
 
608
                if (pos < s.length() - 1)
 
609
                {
 
610
                        String possibleSymbol = s.substring(pos, pos + 2);
 
611
                        if (("ClBr".indexOf(possibleSymbol) >= 0))
 
612
                        {
 
613
                                return possibleSymbol;
 
614
                        }
 
615
                }
 
616
                if ("BCcNnOoFPSsI".indexOf((s.charAt(pos))) >= 0)
 
617
                {
 
618
                        return s.substring(pos, pos + 1);
 
619
                }
 
620
                if ("fpi".indexOf((s.charAt(pos))) >= 0)
 
621
                {
 
622
                        logger.warn("Element ", s, " is normally not sp2 hybridisized!");
 
623
                        return s.substring(pos, pos + 1);
 
624
                }
 
625
                logger.warn("Subset element not found!");
 
626
                return null;
 
627
        }
 
628
 
 
629
 
 
630
        /**
 
631
         *  Gets the RingNumber attribute of the SmilesParser object
 
632
         */
 
633
        private String getRingNumber(String s, int pos) throws InvalidSmilesException {
 
634
                logger.debug("getRingNumber()");
 
635
                pos++;
 
636
 
 
637
                // Two digits impossible due to end of string
 
638
                if (pos >= s.length() - 1)
 
639
                        throw new InvalidSmilesException("Percent sign ring closure numbers must be two-digit.");
 
640
 
 
641
                String retString = s.substring(pos, pos + 2);
 
642
 
 
643
                if (retString.charAt(0) < '0' || retString.charAt(0) > '9' || 
 
644
                        retString.charAt(1) < '0' || retString.charAt(1) > '9')
 
645
                        throw new InvalidSmilesException("Percent sign ring closure numbers must be two-digit.");
 
646
 
 
647
                return retString;
 
648
        }
 
649
 
 
650
        private IAtom assembleAtom(String s) throws InvalidSmilesException
 
651
        {
 
652
                logger.debug("assembleAtom(): Assembling atom from: ", s);
 
653
                IAtom atom = null;
 
654
                int position = 0;
 
655
                String currentSymbol = null;
 
656
                StringBuffer isotopicNumber = new StringBuffer();
 
657
                char mychar;
 
658
                logger.debug("Parse everythings before and including element symbol");
 
659
                do
 
660
                {
 
661
                        try
 
662
                        {
 
663
                                mychar = s.charAt(position);
 
664
                                logger.debug("Parsing char: " + mychar);
 
665
                                if ((mychar >= 'A' && mychar <= 'Z') || (mychar >= 'a' && mychar <= 'z'))
 
666
                                {
 
667
                                        currentSymbol = getElementSymbol(s, position);
 
668
                                        if (currentSymbol == null)
 
669
                                        {
 
670
                                                throw new InvalidSmilesException(
 
671
                                                                "Expected element symbol, found null!"
 
672
                                                                );
 
673
                                        } else
 
674
                                        {
 
675
                                                logger.debug("Found element symbol: ", currentSymbol);
 
676
                                                position = position + currentSymbol.length();
 
677
                                                if (currentSymbol.length() == 1)
 
678
                                                {
 
679
                                                        if (!(currentSymbol.toUpperCase()).equals(currentSymbol))
 
680
                                                        {
 
681
                                                                currentSymbol = currentSymbol.toUpperCase();
 
682
                                                                atom = builder.newAtom(currentSymbol);
 
683
                                                                atom.setHybridization(CDKConstants.HYBRIDIZATION_SP2);
 
684
                                                                if (atom.getHydrogenCount() > 0)
 
685
                                                                {
 
686
                                                                        atom.setHydrogenCount(atom.getHydrogenCount() - 1);
 
687
                                                                }
 
688
                                                        } else
 
689
                                                        {
 
690
                                                                atom = builder.newAtom(currentSymbol);
 
691
                                                        }
 
692
                                                } else
 
693
                                                {
 
694
                                                        atom = builder.newAtom(currentSymbol);
 
695
                                                }
 
696
                                                logger.debug("Made atom: ", atom);
 
697
                                        }
 
698
                                        break;
 
699
                                } else if (mychar >= '0' && mychar <= '9')
 
700
                                {
 
701
                                        isotopicNumber.append(mychar);
 
702
                                        position++;
 
703
                                } else if (mychar == '*')
 
704
                                {
 
705
                                        currentSymbol = "*";
 
706
                                        atom = builder.newPseudoAtom(currentSymbol);
 
707
                                        logger.debug("Made atom: ", atom);
 
708
                                        position++;
 
709
                                        break;
 
710
                                } else
 
711
                                {
 
712
                                        throw new InvalidSmilesException("Found unexpected char: " + mychar);
 
713
                                }
 
714
                        } catch (InvalidSmilesException exc)
 
715
                        {
 
716
                                logger.error("InvalidSmilesException while parsing atom string: " + s);
 
717
                                logger.debug(exc);
 
718
                                throw exc;
 
719
                        } catch (Exception exception)
 
720
                        {
 
721
                                logger.error("Could not parse atom string: ", s);
 
722
                                logger.debug(exception);
 
723
                                throw new InvalidSmilesException("Could not parse atom string: " + s, exception);
 
724
                        }
 
725
                } while (position < s.length());
 
726
                if (isotopicNumber.toString().length() > 0)
 
727
                {
 
728
                        try
 
729
                        {
 
730
                                atom.setMassNumber(Integer.parseInt(isotopicNumber.toString()));
 
731
                        } catch (Exception exception)
 
732
                        {
 
733
                                logger.error("Could not set atom's atom number.");
 
734
                                logger.debug(exception);
 
735
                        }
 
736
                }
 
737
                logger.debug("Parsing part after element symbol (like charge): ", s.substring(position));
 
738
                int charge = 0;
 
739
                int implicitHydrogens = 0;
 
740
                while (position < s.length())
 
741
                {
 
742
                        try
 
743
                        {
 
744
                                mychar = s.charAt(position);
 
745
                                logger.debug("Parsing char: " + mychar);
 
746
                                if (mychar == 'H')
 
747
                                {
 
748
                                        // count implicit hydrogens
 
749
                                        implicitHydrogens = getImplicitHydrogenCount(s, position);
 
750
                                        position++;
 
751
                                        if (implicitHydrogens > 1)
 
752
                                        {
 
753
                                                position++;
 
754
                                        }
 
755
                                        atom.setHydrogenCount(implicitHydrogens);
 
756
                                } else if (mychar == '+' || mychar == '-')
 
757
                                {
 
758
                                        charge = getCharge(s, position);
 
759
                                        position++;
 
760
                                        if (charge < -1 || charge > 1)
 
761
                                        {
 
762
                                                position++;
 
763
                                        }
 
764
                                        atom.setFormalCharge(charge);
 
765
                                } else if (mychar == '@')
 
766
                                {
 
767
                                        if (position < s.length() - 1 && s.charAt(position + 1) == '@')
 
768
                                        {
 
769
                                                position++;
 
770
                                        }
 
771
                                        logger.warn("Ignoring stereo information for atom");
 
772
                                        position++;
 
773
                                } else
 
774
                                {
 
775
                                        throw new InvalidSmilesException("Found unexpected char: " + mychar);
 
776
                                }
 
777
                        } catch (InvalidSmilesException exc)
 
778
                        {
 
779
                                logger.error("InvalidSmilesException while parsing atom string: ", s);
 
780
                                logger.debug(exc);
 
781
                                throw exc;
 
782
                        } catch (Exception exception)
 
783
                        {
 
784
                                logger.error("Could not parse atom string: ", s);
 
785
                                logger.debug(exception);
 
786
                                throw new InvalidSmilesException("Could not parse atom string: " + s, exception);
 
787
                        }
 
788
                }
 
789
                return atom;
 
790
        }
 
791
 
 
792
 
 
793
        /**
 
794
         *  We call this method when a ring (depicted by a number) has been found.
 
795
         */
 
796
        private void handleRing(IAtom atom)
 
797
        {
 
798
                logger.debug("handleRing():");
 
799
                double bondStat = bondStatusForRingClosure;
 
800
                if (ringbonds[thisRing] > bondStat)
 
801
                        bondStat = ringbonds[thisRing];
 
802
                IBond bond = null;
 
803
                IAtom partner = null;
 
804
                IAtom thisNode = rings[thisRing];
 
805
                // lookup
 
806
                if (thisNode != null)
 
807
                {
 
808
                        partner = thisNode;
 
809
                        bond = builder.newBond(atom, partner, bondStat);
 
810
                              if (bondIsAromatic) {
 
811
                
 
812
                bond.setFlag(CDKConstants.ISAROMATIC, true);
 
813
            }
 
814
                        molecule.addBond(bond);
 
815
            bondIsAromatic = false;
 
816
                        rings[thisRing] = null;
 
817
                        ringbonds[thisRing] = -1;
 
818
 
 
819
                } else
 
820
                {
 
821
                        /*
 
822
                         *  First occurence of this ring:
 
823
                         *  - add current atom to list
 
824
                         */
 
825
                        rings[thisRing] = atom;
 
826
                        ringbonds[thisRing] = bondStatusForRingClosure;
 
827
                }
 
828
                bondStatusForRingClosure = 1;
 
829
        }
 
830
 
 
831
        private void addImplicitHydrogens(IMolecule m) {
 
832
                try {
 
833
                        logger.debug("before H-adding: ", m);
 
834
                        hAdder.addImplicitHydrogensToSatisfyValency(m);
 
835
                        logger.debug("after H-adding: ", m);
 
836
                } catch (Exception exception) {
 
837
                        logger.error("Error while calculation Hcount for SMILES atom: ", exception.getMessage());
 
838
                }
 
839
        }
 
840
 
 
841
        private void setupMissingBondOrders(IMolecule m) {
 
842
                try {
 
843
                        valencyChecker.saturate(m);
 
844
                        logger.debug("after adding missing bond orders: ", m);
 
845
                } catch (Exception exception) {
 
846
                        logger.error("Error while calculation Hcount for SMILES atom: ", exception.getMessage());
 
847
                }
 
848
        }
 
849
 
 
850
        private void conceiveAromaticPerception(IMolecule m) {
 
851
                IMoleculeSet moleculeSet = ConnectivityChecker.partitionIntoMolecules(m);
 
852
                logger.debug("#mols ", moleculeSet.getAtomContainerCount());
 
853
                for (int i = 0; i < moleculeSet.getAtomContainerCount(); i++) {
 
854
                        IAtomContainer molecule = moleculeSet.getAtomContainer(i);
 
855
                        logger.debug("mol: ", molecule);
 
856
                        try {
 
857
                                valencyChecker.saturate(molecule);
 
858
                                logger.debug(" after saturation: ", molecule);
 
859
                                if (HueckelAromaticityDetector
 
860
                                                .detectAromaticity(molecule)) {
 
861
                                        logger.debug("Structure is aromatic...");
 
862
                                }
 
863
                        } catch (Exception exception) {
 
864
                                logger.error("Could not perceive aromaticity: ", exception
 
865
                                                .getMessage());
 
866
                                logger.debug(exception);
 
867
                        }
 
868
                }
 
869
        }
 
870
        
 
871
        public boolean isInterrupted() {
 
872
                return valencyChecker.isInterrupted();
 
873
        }
 
874
 
 
875
        public void setInterrupted(boolean interrupted) {
 
876
                valencyChecker.setInterrupted(interrupted);
 
877
        }
 
878
        
 
879
}
 
880