1
/* $Revision: 8067 $ $Author: egonw $ $Date: 2007-03-09 14:59:01 +0100 (Fri, 09 Mar 2007) $
3
* Copyright (C) 1997-2007 The Chemistry Development Kit (CDK) project
5
* Contact: cdk-devel@lists.sourceforge.net
7
* This program is free software; you can redistribute it and/or
8
* modify it under the terms of the GNU Lesser General Public License
9
* as published by the Free Software Foundation; either version 2.1
10
* of the License, or (at your option) any later version.
11
* All I ask is that proper credit is given for my work, which includes
12
* - but is not limited to - adding the above copyright notice to the beginning
13
* of your source code files, and to any copyright notice that you may distribute
14
* with programs based on this work.
16
* This program is distributed in the hope that it will be useful,
17
* but WITHOUT ANY WARRANTY; without even the implied warranty of
18
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
* GNU Lesser General Public License for more details.
21
* You should have received a copy of the GNU Lesser General Public License
22
* along with this program; if not, write to the Free Software
23
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
25
package org.openscience.cdk.tools;
27
import java.util.Collections;
28
import java.util.Comparator;
29
import java.util.List;
30
import java.util.ArrayList;
32
import org.openscience.cdk.CDKConstants;
33
import org.openscience.cdk.ChemObject;
34
import org.openscience.cdk.Molecule;
35
import org.openscience.cdk.config.IsotopeFactory;
36
import org.openscience.cdk.exception.CDKException;
37
import org.openscience.cdk.graph.invariant.CanonicalLabeler;
38
import org.openscience.cdk.interfaces.IAtom;
39
import org.openscience.cdk.interfaces.IAtomContainer;
40
import org.openscience.cdk.interfaces.IIsotope;
41
import org.openscience.cdk.interfaces.IRing;
42
import org.openscience.cdk.interfaces.IRingSet;
43
import org.openscience.cdk.ringsearch.SSSRFinder;
46
* Generates HOSE codes {@cdk.cite BRE78}.
47
* IMPORTANT: Your molecule must contain implicit or explicit hydrogens
48
* for this method to work properly
51
* @cdk.keyword HOSE code, spherical atom search
52
* @cdk.created 2002-05-10
54
public class HOSECodeGenerator implements java.io.Serializable
57
private LoggingTool logger;
59
private static final long serialVersionUID = -4353471818831864513L;
62
* Container for the nodes in a sphere.
64
protected List sphereNodes = null;
65
protected List sphereNodesWithAtoms = null;
68
* Container for the node in the next sphere Assembled in a recursive method
69
* and then passed to the next recursion to become "sphereNodes"
71
protected List nextSphereNodes = null;
73
* Counter for the sphere in which we currently work
75
protected int sphere = 0;
77
* How many spheres are we supposed inspect.
79
protected int maxSphere = 0;
82
* Here we store the spheres that we assemble, in order to parse them into a
85
protected List[] spheres = null;
86
protected List[] spheresWithAtoms = null;
89
* The HOSECode string that we assemble
91
protected StringBuffer HOSECode = null;
94
* The molecular structure on which we work
96
protected IAtomContainer atomContainer;
99
* Delimiters used to separate spheres in the output string. Bremser uses the
100
* sequence"(//)" for the first four spheres.
102
protected String[] sphereDelimiters =
104
"(", "/", "/", ")", "/", "/", "/", "/", "/", "/", "/", "/"
107
* The bond symbols used for bond orders "single", "double", "triple" and
110
protected String bondSymbols[] =
112
"", "", "=", "%", "*"
115
protected String centerCode = null;
117
public TreeNode rootNode = null;
120
boolean debug = false;
122
private IAtomContainer acold=null;
123
private IRingSet soar=null;
126
* The rank order for the given element symbols
129
static final String[] rankedSymbols =
131
"C", "O", "N", "S", "P", "Si", "B", "F", "Cl", "Br", ";", "I", "#", "&", ","
135
* The ranking values to be used for the symbols above
137
static final int[] symbolRankings =
139
9000, 8900, 8800, 8700,
140
8600, 8500, 8400, 8300, 8200, 8100, 8000, 7900, 1200, 1100, 1000
144
* The bond rankings to be used for the four bond order possibilities
147
static final int[] bondRankings =
149
0, 0, 200000, 300000, 100000
154
* Constructor for the HOSECodeGenerator
156
public HOSECodeGenerator()
158
logger = new LoggingTool(this);
160
sphereNodes = new ArrayList();
161
sphereNodesWithAtoms = new ArrayList();
162
nextSphereNodes = new ArrayList();
163
HOSECode = new StringBuffer();
168
* This method is intended to be used to get the atoms around an atom in spheres. It is not used in this class, but is provided for other classes to use.
169
* It also creates the HOSE code in HOSECode as a side-effect
171
*@param ac The AtomContainer with the molecular skeleton in which the root atom resides.
172
*@param root The root atom for which to produce the spheres.
173
*@param noOfSpheres The number of spheres to look at.
174
*@paramm ringsize Shall the center code have the ring size in it? Only use if you want to have the hose code later, else say false.
175
*@return An array of Vectors. The vector at i-1 contains the atoms at sphere i as TreeNodes.
177
public List[] getSpheres(Molecule ac, IAtom root, int noOfSpheres, boolean ringsize) throws org.openscience.cdk.exception.CDKException
180
this.atomContainer = ac;
181
maxSphere = noOfSpheres;
182
spheres = new List[noOfSpheres + 1];
183
spheresWithAtoms = new List[noOfSpheres + 1];
184
for (int i = 0; i < ac.getAtomCount(); i++)
186
ac.getAtom(i).setFlag(CDKConstants.VISITED, false);
188
root.setFlag(CDKConstants.VISITED, true);
189
rootNode = new TreeNode(root.getSymbol(), null, root, (double)0, atomContainer.getConnectedBondsCount(root), 0);
191
* All we need to observe is how the ranking of substituents
192
* in the subsequent spheres of the root nodes influences the
193
* ranking of the first sphere, sinces the order of a node in a sphere
194
* depends on the order the preceding node in its branch
196
HOSECode = new StringBuffer();
197
createCenterCode(root,ac,ringsize);
198
breadthFirstSearch(root, false);
200
fillUpSphereDelimiters();
201
logger.debug("HOSECodeGenerator -> HOSECode: " + HOSECode.toString());
202
return spheresWithAtoms;
207
* Produces a HOSE code for Atom 'root' in the AtomContainer 'ac'. The HOSE
208
* code is produced for the number of spheres given by noOfSpheres
209
* IMPORTANT: if you want aromaticity to be included in the code, you need
210
* to run the AtomContainer ac to the HueckelAromaticityDetector prior to
211
* using getHOSECode(). This method only gives proper results if the molecule is
212
* fully saturated (if not, the order of the HOSE code might depend on atoms in higher spheres).
213
* This method is known to fail for protons sometimes.
214
* IMPORTANT: Your molecule must contain implicit or explicit hydrogens
215
* for this method to work properly
217
*@param ac The AtomContainer with the molecular skeleton in which the root atom resides
218
*@param root The root atom for which to produce the HOSE code
219
*@param noOfSpheres The number of spheres to look at
220
*@return The HOSECode value
221
*@exception org.openscience.cdk.exception.CDKException Thrown if something is wrong
223
public String getHOSECode(IAtomContainer ac, IAtom root, int noOfSpheres) throws org.openscience.cdk.exception.CDKException
225
return getHOSECode(ac,root,noOfSpheres, false);
230
* Produces a HOSE code for Atom 'root' in the AtomContainer 'ac'. The HOSE
231
* code is produced for the number of spheres given by noOfSpheres
232
* IMPORTANT: if you want aromaticity to be included in the code, you need
233
* to run the AtomContainer ac to the HueckelAromaticityDetector prior to
234
* using getHOSECode(). This method only gives proper results if the molecule is
235
* fully saturated (if not, the order of the HOSE code might depend on atoms in higher spheres).
236
* This method is known to fail for protons sometimes.
238
*@param ac The AtomContainer with the molecular skeleton in which the root atom resides
239
*@param root The root atom for which to produce the HOSE code
240
*@param noOfSpheres The number of spheres to look at
241
*@param ringsize The size of the ring(s) it is in is included in center atom code
242
*@return The HOSECode value
243
*@exception org.openscience.cdk.exception.CDKException Thrown if something is wrong
245
public String getHOSECode(IAtomContainer ac, IAtom root, int noOfSpheres, boolean ringsize) throws org.openscience.cdk.exception.CDKException
247
CanonicalLabeler canLabler = new CanonicalLabeler();
248
canLabler.canonLabel(ac);
250
this.atomContainer = ac;
251
maxSphere = noOfSpheres;
252
spheres = new List[noOfSpheres + 1];
253
for (int i = 0; i < ac.getAtomCount(); i++)
255
ac.getAtom(i).setFlag(CDKConstants.VISITED, false);
257
root.setFlag(CDKConstants.VISITED, true);
258
rootNode = new TreeNode(root.getSymbol(), null, root, (double)0, atomContainer.getConnectedBondsCount(root), 0);
260
* All we need to observe is how the ranking of substituents
261
* in the subsequent spheres of the root nodes influences the
262
* ranking of the first sphere, sinces the order of a node in a sphere
263
* depends on the order the preceding node in its branch
265
HOSECode = new StringBuffer();
266
createCenterCode(root, ac, ringsize);
267
breadthFirstSearch(root,true);
269
fillUpSphereDelimiters();
270
logger.debug("HOSECodeGenerator -> HOSECode: ", HOSECode);
271
return HOSECode.toString();
274
private void createCenterCode(IAtom root, IAtomContainer ac, boolean ringsize)
276
int partnerCount = 0;
277
partnerCount = atomContainer.getConnectedBondsCount(root) + root.getHydrogenCount();
278
centerCode = root.getSymbol() + "-" + partnerCount + createChargeCode(root)+(ringsize ? getRingcode(root, ac) : "" )+";";
282
private String getRingcode(IAtom root, IAtomContainer ac){
284
soar=new SSSRFinder(ac).findSSSR();
286
boolean[] bool=new boolean[1000];
287
StringBuffer sb=new StringBuffer();
288
for(int i=0;i<soar.getRings(root).getAtomContainerCount();i++){
289
if(((IRing)soar.getRings(root).getAtomContainer(i)).getAtomCount()<bool.length)
290
bool[((IRing)soar.getRings(root).getAtomContainer(i)).getAtomCount()]=true;
292
for(int i=0;i<bool.length;i++){
296
if(sb.toString().equals(""))
299
return "-"+sb.toString();
302
private String createChargeCode(IAtom atom){
303
StringBuffer tempCode=new StringBuffer();
304
if (atom != null && atom.getFormalCharge()!=0){
305
if(Math.abs(atom.getFormalCharge())==1){
306
if(atom.getFormalCharge()<0)
307
tempCode.append("-");
309
tempCode.append("+");
311
tempCode.append("'");
312
if(atom.getFormalCharge()>0)
313
tempCode.append("+");
314
tempCode.append(atom.getFormalCharge()+"'");
321
* Prepares for a breadth first search within the AtomContainer. The actual
322
* recursion is done in nextSphere()
324
*@param root The atom at which we start the search
325
*@exception org.openscience.cdk.exception.CDKException If something goes wrong.
327
private void breadthFirstSearch(IAtom root,boolean addTreeNode) throws org.openscience.cdk.exception.CDKException
330
TreeNode tempNode = null;
331
java.util.List conAtoms = atomContainer.getConnectedAtomsList(root);
333
org.openscience.cdk.interfaces.IBond bond = null;
335
sphereNodesWithAtoms.clear();
336
for (int i = 0; i < conAtoms.size(); i++){
338
atom = (IAtom)conAtoms.get(i);
339
if(atom.getSymbol().equals("H"))
341
bond = atomContainer.getBond(root, atom);
343
* In the first sphere the atoms are labled with
344
* their own atom atom as source
346
if (bond.getFlag(CDKConstants.ISAROMATIC))
348
tempNode = new TreeNode(atom.getSymbol(), new TreeNode(root.getSymbol(), null, root, (double) 0, 0, (long) 0), atom, 4, atomContainer.getConnectedBondsCount(atom), 0);
351
tempNode = new TreeNode(atom.getSymbol(), new TreeNode(root.getSymbol(), null, root, (double) 0, 0, (long) 0), atom, bond.getOrder(), atomContainer.getConnectedBondsCount(atom), 0);
354
sphereNodes.add(tempNode);
356
sphereNodesWithAtoms.add(atom);
358
// rootNode.childs.addElement(tempNode);
359
atom.setFlag(CDKConstants.VISITED, true);
360
} catch (Exception exc)
362
throw new CDKException("Error in HOSECodeGenerator->breadthFirstSearch.", exc);
365
Collections.sort(sphereNodes,new TreeNodeComparator());
366
nextSphere(sphereNodes);
370
* The actual recursion method for our breadth first search Each node in
371
* sphereNodes is inspected for its decendants which are then stored in
372
* nextSphereNodes, which again is passed to the next recursion level of
375
*@param sphereNodes The sphereNodes to be inspected
376
*@exception org.openscience.cdk.exception.CDKException If something goes wrong
378
private void nextSphere(List sphereNodes) throws org.openscience.cdk.exception.CDKException
380
spheres[sphere] = sphereNodes;
381
if(spheresWithAtoms!=null)
382
spheresWithAtoms[sphere] = sphereNodesWithAtoms;
384
* From here we start assembling the next sphere
388
java.util.List conAtoms = null;
389
TreeNode treeNode = null;
390
nextSphereNodes = new ArrayList();
391
org.openscience.cdk.interfaces.IBond bond = null;
392
for (int i = 0; i < sphereNodes.size(); i++)
394
treeNode = (TreeNode) sphereNodes.get(i);
395
if (!("&;#:,".indexOf(treeNode.symbol) >= 0))
397
node = treeNode.atom;
398
if(node.getSymbol().equals("H"))
401
conAtoms = atomContainer.getConnectedAtomsList(node);
402
if (conAtoms.size() == 1){
403
nextSphereNodes.add(new TreeNode(",", treeNode, null, 0, 0, treeNode.score));
405
for (int j = 0; j < conAtoms.size(); j++)
407
toNode = (IAtom)conAtoms.get(j);
408
if (toNode != treeNode.source.atom)
410
bond = atomContainer.getBond(node, toNode);
411
if (bond.getFlag(CDKConstants.ISAROMATIC))
413
nextSphereNodes.add(new TreeNode(toNode.getSymbol(), treeNode, toNode, 4, atomContainer.getConnectedBondsCount(toNode), treeNode.score));
416
nextSphereNodes.add(new TreeNode(toNode.getSymbol(), treeNode, toNode, bond.getOrder(), atomContainer.getConnectedBondsCount(toNode), treeNode.score));
423
Collections.sort(nextSphereNodes,new TreeNodeComparator());
424
if (sphere < maxSphere)
427
nextSphere(nextSphereNodes);
431
public String makeBremserCompliant(String code)
433
int sepIndex = code.indexOf(";");
436
code = code.substring(sepIndex + 1, code.length());
442
* After recursivly having established the spheres and assigning each node an
443
* appropriate score, we now generate the complete HOSE code.
445
*@exception org.openscience.cdk.exception.CDKException Thrown if something goes wrong
447
private void createCode() throws org.openscience.cdk.exception.CDKException
449
List sphereNodes = null;
451
for (int f = 0; f < atomContainer.getAtomCount(); f++)
453
atomContainer.getAtom(f).setFlag(CDKConstants.VISITED, false);
456
for (int f = 0; f < maxSphere; f++)
458
sphereNodes = spheres[maxSphere - f];
459
for (int g = 0; g < sphereNodes.size(); g++)
461
tn = (TreeNode) sphereNodes.get(g);
462
if (tn.source != null)
464
tn.source.ranking += tn.degree;
470
for (int f = 0; f < maxSphere; f++)
472
sphereNodes = spheres[f];
473
calculateNodeScores(sphereNodes);
474
sortNodesByScore(sphereNodes);
477
for (int f = 0; f < maxSphere; f++)
479
sphereNodes = spheres[f];
480
for (int g = 0; g < sphereNodes.size() ; g++)
482
tn = (TreeNode) sphereNodes.get(g);
483
tn.score += tn.ranking;
485
sortNodesByScore(sphereNodes);
487
for (int f = 0; f < maxSphere; f++)
489
sphereNodes = spheres[f];
490
for (int g = 0; g < sphereNodes.size() ; g++)
492
tn = (TreeNode) sphereNodes.get(g);
493
String localscore=tn.score+"";
494
while(localscore.length()<6){
495
localscore="0"+localscore;
497
tn.stringscore=tn.source.stringscore+""+localscore;
499
sortNodesByScore(sphereNodes);
501
HOSECode.append(centerCode);
502
for (int f = 0; f < maxSphere; f++)
505
sphereNodes = spheres[f];
506
String s = getSphereCode(sphereNodes);
512
* Generates the string code for a given sphere
514
*@param sphereNodes A vector of TreeNodes for which a string code is to be generated
515
*@return The SphereCode value
516
*@exception org.openscience.cdk.exception.CDKException Thrown if something goes wrong
518
private String getSphereCode(List sphereNodes) throws org.openscience.cdk.exception.CDKException
520
if (sphereNodes == null || sphereNodes.size() < 1)
522
return sphereDelimiters[sphere - 1];
524
TreeNode treeNode = null;
525
StringBuffer code = new StringBuffer();
527
* append the tree node code to the HOSECode in
528
* their now determined order, using commas to
529
* separate nodes from different branches
531
IAtom branch = ((TreeNode) (((TreeNode) sphereNodes.get(0)).source)).atom;
532
StringBuffer tempCode = null;
533
for (int i = 0; i < sphereNodes.size(); i++)
535
treeNode = (TreeNode) sphereNodes.get(i);
536
tempCode = new StringBuffer();
537
if (!treeNode.source.stopper && treeNode.source.atom != branch)
539
branch = treeNode.source.atom;
543
if (!treeNode.source.stopper && treeNode.source.atom == branch)
545
if (treeNode.bondType <= 4)
547
tempCode.append(bondSymbols[(int) treeNode.bondType]);
550
throw new CDKException("Unknown bond type");
552
if (treeNode.atom != null && !treeNode.atom.getFlag(CDKConstants.VISITED))
554
tempCode.append(getElementSymbol(treeNode.symbol));
556
else if (treeNode.atom != null && treeNode.atom.getFlag(CDKConstants.VISITED))
558
tempCode.append("&");
559
treeNode.stopper = true;
561
code.append(tempCode+createChargeCode(treeNode.atom));
562
treeNode.hSymbol = tempCode.toString();
564
if (treeNode.atom != null) treeNode.atom.setFlag(CDKConstants.VISITED, true);
565
if (treeNode.source.stopper) treeNode.stopper = true;
567
code.append(sphereDelimiters[sphere - 1]);
568
return code.toString();
573
* Gets the element rank for a given element symbol as given in Bremser's
576
*@param symbol The element symbol for which the rank is to be determined
577
*@return The element rank
579
private double getElementRank(String symbol)
581
for (int f = 0; f < rankedSymbols.length; f++)
583
if (rankedSymbols[f].equals(symbol))
585
return symbolRankings[f];
589
IIsotope isotope = IsotopeFactory.getInstance(new ChemObject().getBuilder()).getMajorIsotope(symbol);
590
return ((double) 800000 - isotope.getMassNumber());
591
} catch (Exception exception) {
592
System.err.println("Could not find major isotope for this element!!! : " + symbol);
593
System.err.println("Because of this error: " + exception.getMessage());
595
return (double)800000;
599
* Returns the Bremser-compatible symbols for a given element. Silicon, for
600
* example, is actually "Q". :-)
602
*@param sym The element symbol to be converted
603
*@return The converted symbol
605
private String getElementSymbol(String sym)
607
if (sym.equals("Si"))
611
if (sym.equals("Cl"))
615
if (sym.equals("Br"))
628
* Determines the ranking score for each node, allowing for a sorting of nodes
631
*@param sphereNodes The nodes for which the score is to be calculated.
632
*@exception org.openscience.cdk.exception.CDKException Thrown if something goes wrong.
634
private void calculateNodeScores(List sphereNodes) throws org.openscience.cdk.exception.CDKException
636
TreeNode treeNode = null;
637
for (int i = 0; i < sphereNodes.size(); i++)
639
treeNode = (TreeNode) sphereNodes.get(i);
640
treeNode.score += getElementRank(treeNode.symbol);
641
if (treeNode.bondType <= 4)
643
treeNode.score += bondRankings[(int) treeNode.bondType];
646
throw new CDKException("Unknown bond type encountered in HOSECodeGenerator");
652
* Sorts the nodes (atoms) in the sphereNode vector according to their score
653
* This is used for the essential ranking of nodes in HOSE code sphere
655
*@param sphereNodes A vector with sphere nodes to be sorted.
657
private void sortNodesByScore(List sphereNodes)
661
if (sphereNodes.size() == 0) return;
663
* Now we sort by score
668
for (int i = 0; i < sphereNodes.size() - 1; i++)
670
if (((TreeNode) sphereNodes.get(i + 1)).stringscore.compareTo(((TreeNode) sphereNodes.get(i)).stringscore)>0)
672
obj = sphereNodes.get(i + 1);
673
sphereNodes.remove(i + 1);
674
sphereNodes.add(i, obj);
679
/* Having sorted a sphere, we lable the nodes with their sort order */
680
TreeNode temp = null;
681
for (int i = 0; i < sphereNodes.size(); i++)
683
temp = ((TreeNode) sphereNodes.get(i));
684
temp.sortOrder = sphereNodes.size() - i;
689
* If we use less than four sphere, this fills up the code with the missing
690
* delimiters such that we are compatible with Bremser's HOSE code table.
692
private void fillUpSphereDelimiters()
694
logger.debug("Sphere: " + sphere);
695
for (int f = sphere; f < 4; f++)
697
HOSECode.append(sphereDelimiters[f]);
702
class TreeNodeComparator implements Comparator {
704
*The compare method, compares by canonical label of atoms
706
* @param obj1 The first TreeNode
707
* @param obj2 The second TreeNode
710
public int compare(Object obj1, Object obj2) {
711
if(obj1==null || obj2==null || ((TreeNode) obj1).getAtom()==null || ((TreeNode) obj2).getAtom()==null)
713
Long label1 = (Long)((TreeNode) obj1).getAtom().getProperty("CanonicalLable");
714
Long label2 = (Long)((TreeNode) obj2).getAtom().getProperty("CanonicalLable");
715
if(label1==null || label2==null)
717
if (label1.intValue() < label2.intValue()) {
720
if (label1.intValue() > label2.intValue()) {
728
* Helper class for storing the properties of a node in our breadth first
732
* @cdk.created 2002-11-16
745
String hSymbol = null;
746
boolean stopper = false;
747
String stringscore="";
750
* Constructor for the TreeNode object
752
*@param symbol The Element symbol of the node
753
*@param source The preceding node for this node
754
*@param atom The cdk atom object belonging to this node
755
*@param bondType The bond type by which this node was connect to its
757
*@param score The score used to rank this node within its sphere.
758
*@param degree Description of the Parameter
760
TreeNode(String symbol, TreeNode source, IAtom atom, double bondType, int degree, long score)
762
this.symbol = symbol;
763
this.source = source;
765
this.degree = degree;
767
this.bondType = bondType;
770
childs = new ArrayList();
773
public IAtom getAtom(){
779
* A TreeNode is equal to another TreeNode if it
780
* stands for the same atom object
782
*@param o The object tht we compare this TreeNode to
783
*@return True, if the this TreeNode's atom object equals the one of the other TreeNode
785
public boolean equals(Object o)
789
if (this.atom == ((TreeNode) o).atom)
796
/* we do nothing here because anything
797
that could seriously happen here is the we
798
got something which is not a TreeNode and then got
799
a class cast exception. Thus we can just wait until the
800
end of the method returns a "false" */
805
public String toString()
810
s += (atomContainer.getAtomNumber(atom) + 1);
813
s += "; r=" + ranking;
814
s += "; d = " + degree;
818
return exc.toString();
825
public List getNodesInSphere(int sphereNumber){
826
sphereNodes = spheres[sphereNumber-1];
827
List atoms=new ArrayList();
828
for (int g = 0; g < sphereNodes.size() ; g++) {
829
atoms.add(((TreeNode) sphereNodes.get(g)).atom);