3
* $Date: 2007-01-04 18:46:10 +0100 (Thu, 04 Jan 2007) $
6
* Copyright (C) 2002-2007 The Chemistry Development Kit (CDK) project
8
* Contact: cdk-devel@lists.sf.net
10
* This library is free software; you can redistribute it and/or
11
* modify it under the terms of the GNU Lesser General Public
12
* License as published by the Free Software Foundation; either
13
* version 2.1 of the License, or (at your option) any later version.
15
* This library is distributed in the hope that it will be useful,
16
* but WITHOUT ANY WARRANTY; without even the implied warranty of
17
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18
* Lesser General Public License for more details.
20
* You should have received a copy of the GNU Lesser General Public
21
* License along with this library; if not, write to the Free Software
22
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24
package org.openscience.cdk.io.inchi;
26
import java.util.regex.Matcher;
27
import java.util.regex.Pattern;
29
import org.openscience.cdk.interfaces.IAtom;
30
import org.openscience.cdk.interfaces.IAtomContainer;
31
import org.openscience.cdk.interfaces.IBond;
32
import org.openscience.cdk.tools.LoggingTool;
35
* Tool to help process INChI 1.12beta content.
37
* @cdk.module experimental
39
public class INChIContentProcessorTool {
41
private LoggingTool logger;
43
public INChIContentProcessorTool() {
44
logger = new LoggingTool(this);
48
* Processes the content from the formula field of the INChI.
49
* Typical values look like C6H6, from INChI=1.12Beta/C6H6/c1-2-4-6-5-3-1/h1-6H.
51
public IAtomContainer processFormula(IAtomContainer parsedContent, String atomsEncoding) {
52
logger.debug("Parsing atom data: ", atomsEncoding);
54
Pattern pattern = Pattern.compile("([A-Z][a-z]?)(\\d+)?(.*)");
55
String remainder = atomsEncoding;
56
while (remainder.length() > 0) {
57
logger.debug("Remaining: ", remainder);
58
Matcher matcher = pattern.matcher(remainder);
59
if (matcher.matches()) {
60
String symbol = matcher.group(1);
61
logger.debug("Atom symbol: ", symbol);
62
if (symbol.equals("H")) {
63
// don't add explicit hydrogens
65
String occurenceStr = matcher.group(2);
67
if (occurenceStr != null) {
68
occurence = Integer.parseInt(occurenceStr);
70
logger.debug(" occurence: ", occurence);
71
for (int i=1; i<=occurence; i++) {
72
parsedContent.addAtom(parsedContent.getBuilder().newAtom(symbol));
75
remainder = matcher.group(3);
76
if (remainder == null) remainder = "";
77
logger.debug(" Remaining: ", remainder);
79
logger.error("No match found!");
82
logger.debug("NO atoms: ", parsedContent.getAtomCount());
88
* Processes the content from the connections field of the INChI.
89
* Typical values look like 1-2-4-6-5-3-1, from INChI=1.12Beta/C6H6/c1-2-4-6-5-3-1/h1-6H.
91
* @param bondsEncoding the content of the INChI connections field
92
* @param container the atomContainer parsed from the formula field
93
* @param source the atom to build the path upon. If -1, then start new path
95
* @see #processFormula
97
public void processConnections(String bondsEncoding,
98
IAtomContainer container, int source){
99
logger.debug("Parsing bond data: ", bondsEncoding);
101
IBond bondToAdd = null;
102
/* Fixme: treatment of branching is too limited! */
103
String remainder = bondsEncoding;
104
while (remainder.length() > 0) {
105
logger.debug("Bond part: ", remainder);
106
if (remainder.charAt(0) == '(') {
107
String branch = chopBranch(remainder);
108
processConnections(branch, container, source);
109
if (branch.length()+2 <= remainder.length()) {
110
remainder = remainder.substring(branch.length()+2);
115
Pattern pattern = Pattern.compile("^(\\d+)-?(.*)");
116
Matcher matcher = pattern.matcher(remainder);
117
if (matcher.matches()) {
118
String targetStr = matcher.group(1);
119
int target = Integer.parseInt(targetStr);
120
logger.debug("Source atom: ", source);
121
logger.debug("Target atom: ", targetStr);
122
IAtom targetAtom = container.getAtom(target-1);
124
IAtom sourceAtom = container.getAtom(source-1);
125
bondToAdd = container.getBuilder().newBond(sourceAtom, targetAtom, 1.0);
126
container.addBond(bondToAdd);
128
remainder = matcher.group(2);
130
logger.debug(" remainder: ", remainder);
132
logger.error("Could not get next bond info part");
140
* Extracts the first full branch. It extracts everything between the first
141
* '(' and the corresponding ')' char.
143
private String chopBranch(String remainder) {
144
boolean doChop = false;
146
StringBuffer choppedString = new StringBuffer();
147
for (int i=0; i<remainder.length(); i++) {
148
char currentChar = remainder.charAt(i);
149
if (currentChar == '(') {
150
if (doChop) choppedString.append(currentChar);
153
} else if (currentChar == ')') {
155
if (branchLevel == 0) doChop = false;
156
if (doChop) choppedString.append(currentChar);
158
choppedString.append(currentChar);
161
return choppedString.toString();