3
* $Date: 2007-10-14 20:29:40 +0200 (Sun, 14 Oct 2007) $
6
* Copyright (C) 2004-2007 Rajarshi Guha <rajarshi@users.sourceforge.net>
8
* Contact: cdk-devel@lists.sourceforge.net
10
* This program is free software; you can redistribute it and/or
11
* modify it under the terms of the GNU Lesser General Public License
12
* as published by the Free Software Foundation; either version 2.1
13
* of the License, or (at your option) any later version.
15
* This program is distributed in the hope that it will be useful,
16
* but WITHOUT ANY WARRANTY; without even the implied warranty of
17
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
* GNU Lesser General Public License for more details.
20
* You should have received a copy of the GNU Lesser General Public License
21
* along with this program; if not, write to the Free Software
22
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24
package org.openscience.cdk.qsar.descriptors.molecular;
26
import Jama.EigenvalueDecomposition;
28
import org.openscience.cdk.CDKConstants;
29
import org.openscience.cdk.Molecule;
30
import org.openscience.cdk.aromaticity.HueckelAromaticityDetector;
31
import org.openscience.cdk.charges.GasteigerMarsiliPartialCharges;
32
import org.openscience.cdk.charges.Polarizability;
33
import org.openscience.cdk.config.IsotopeFactory;
34
import org.openscience.cdk.exception.CDKException;
35
import org.openscience.cdk.interfaces.IAtomContainer;
36
import org.openscience.cdk.qsar.DescriptorSpecification;
37
import org.openscience.cdk.qsar.DescriptorValue;
38
import org.openscience.cdk.qsar.IMolecularDescriptor;
39
import org.openscience.cdk.qsar.result.DoubleArrayResult;
40
import org.openscience.cdk.qsar.result.IDescriptorResult;
41
import org.openscience.cdk.tools.HydrogenAdder;
42
import org.openscience.cdk.tools.LoggingTool;
43
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
46
* Eigenvalue based descriptor noted for its utility in chemical diversity.
47
* Described by Pearlman et al. {@cdk.cite PEA99}.
49
* <p>The descriptor is based on a weighted version of the Burden matrix {@cdk.cite BUR89, BUR97}
50
* which takes into account both the connectivity as well as atomic
51
* properties of a molecule. The weights are a variety of atom properties placed along the
52
* diagonal of the Burden matrix. Currently three weighting schemes are employed
55
* <li>partial charge (Gasteiger Marsilli)
56
* <li>polarizability {@cdk.cite KJ81}
58
* <p>By default, the descriptor will return the highest and lowest eigenvalues for the three
59
* classes of descriptor in a single ArrayList (in the order shown above). However it is also
60
* possible to supply a parameter list indicating how many of the highest and lowest eigenvalues
61
* (for each class of descriptor) are required.
63
* <p>The descriptor works with the hydrogen depleted molecule and thus the maximum number
64
* of eigenvalues calculated for any class of BCUT descriptor is equal to the number
65
* of heavy atoms present.
66
* <p>This descriptor uses these parameters:
71
* <td>Description</td>
76
* <td>The number of highest eigenvalue</td>
81
* <td>The number of lowest eigenvalue</td>
85
* Returns an array of values in the following order
88
* <li>BCUTw-1l, BCUTw-2l ... - <i>nhigh</i> lowest atom weighted BCUTS
89
* <li>BCUTw-1h, BCUTw-2h ... - <i>nlow</i> highest atom weighted BCUTS
90
* <li>BCUTc-1l, BCUTc-2l ... - <i>nhigh</i> lowest partial charge weighted BCUTS
91
* <li>BCUTc-1h, BCUTc-2h ... - <i>nlow</i> highest partial charge weighted BCUTS
92
* <li>BCUTp-1l, BCUTp-2l ... - <i>nhigh</i> lowest polarizability weighted BCUTS
93
* <li>BCUTp-1h, BCUTp-2h ... - <i>nlow</i> highest polarizability weighted BCUTS
95
* @author Rajarshi Guha
96
* @cdk.created 2004-11-30
97
* @cdk.builddepends Jama-1.0.1.jar
98
* @cdk.depends Jama-1.0.1.jar
100
* @cdk.set qsar-descriptors
101
* @cdk.dictref qsar-descriptors:BCUT
103
* @cdk.keyword descriptor
105
public class BCUTDescriptor implements IMolecularDescriptor {
106
private LoggingTool logger;
108
// the number of negative & positive eigenvalues
109
// to return for each class of BCUT descriptor
112
private boolean checkAromaticity = true;
114
public BCUTDescriptor() {
115
logger = new LoggingTool(this);
117
// set the default number of BCUT's
122
public DescriptorSpecification getSpecification() {
123
return new DescriptorSpecification(
124
"http://www.blueobelisk.org/ontologies/chemoinformatics-algorithms/#BCUT",
125
this.getClass().getName(),
126
"$Id: BCUTDescriptor.java 9054 2007-10-14 18:29:40Z egonw $",
127
"The Chemistry Development Kit");
131
* Sets the parameters attribute of the BCUTDescriptor object.
133
* @param params The new parameter values. This descriptor takes 3 parameters: number of highest
134
* eigenvalues and number of lowest eigenvalues. If 0 is specified for either (the default)
135
* then all calculated eigenvalues are returned. The third parameter checkAromaticity is a boolean.
136
* If checkAromaticity is true, the method check the aromaticity, if false, means that the aromaticity has
137
* already been checked.
138
* @throws CDKException if the parameters are of the wrong type
139
* @see #getParameters
141
public void setParameters(Object[] params) throws CDKException {
142
// we expect 3 parameters
143
if (params.length != 3) {
144
throw new CDKException("BCUTDescriptor requires 3 parameters");
146
if (!(params[0] instanceof Integer) || !(params[1] instanceof Integer)) {
147
throw new CDKException("Parameters must be of type Integer");
148
} else if (!(params[2] instanceof Boolean)) {
149
throw new CDKException("The third parameter must be of type Boolean");
151
// ok, all should be fine
153
this.nhigh = ((Integer) params[0]).intValue();
154
this.nlow = ((Integer) params[1]).intValue();
155
this.checkAromaticity = ((Boolean) params[2]).booleanValue();
157
if (this.nhigh < 0 || this.nlow < 0) {
158
throw new CDKException("Number of eigenvalues to return must be positive or 0");
163
* Gets the parameters attribute of the BCUTDescriptor object.
165
* @return Three element array of Integer and one boolean representing number of highest and lowest eigenvalues and the checkAromaticity flag
166
* to return respectively
167
* @see #setParameters
169
public Object[] getParameters() {
170
Object params[] = new Object[3];
171
params[0] = new Integer(this.nhigh);
172
params[1] = new Integer(this.nlow);
173
params[2] = Boolean.valueOf(this.checkAromaticity);
178
* Gets the parameterNames attribute of the BCUTDescriptor object.
180
* @return The parameterNames value
182
public String[] getParameterNames() {
183
String[] params = new String[3];
186
params[2] = "checkAromaticity";
192
* Gets the parameterType attribute of the BCUTDescriptor object.
194
* @param name Description of the Parameter (can be either 'nhigh' or 'nlow' or checkAromaticity)
195
* @return The parameterType value
197
public Object getParameterType(String name) {
198
Object object = null;
199
if (name.equals("nhigh")) object = new Integer(1);
200
if (name.equals("nlow")) object = new Integer(1);
201
if (name.equals("checkAromaticity")) object = new Integer(1);
206
static private class BurdenMatrix {
208
static double[][] evalMatrix(IAtomContainer atomContainer, double[] vsd) {
209
IAtomContainer local = AtomContainerManipulator.removeHydrogens(atomContainer);
211
int natom = local.getAtomCount();
212
double[][] matrix = new double[natom][natom];
214
/* set the off diagonal entries */
215
for (int i = 0; i < natom - 1; i++) {
216
for (int j = i + 1; j < natom; j++) {
217
for (int k = 0; k < local.getBondCount(); k++) {
218
org.openscience.cdk.interfaces.IBond bond = local.getBond(k);
219
if (bond.contains(local.getAtom(i)) && bond.contains(local.getAtom(j))) {
220
if (bond.getOrder() == CDKConstants.BONDORDER_SINGLE) matrix[i][j] = 0.1;
221
else if (bond.getOrder() == CDKConstants.BONDORDER_DOUBLE) matrix[i][j] = 0.2;
222
else if (bond.getOrder() == CDKConstants.BONDORDER_TRIPLE) matrix[i][j] = 0.3;
223
else if (bond.getOrder() == CDKConstants.BONDORDER_AROMATIC) matrix[i][j] = 0.15;
225
if (local.getConnectedBondsCount(i) == 1 || local.getConnectedBondsCount(j) == 1) {
226
matrix[i][j] += 0.01;
228
matrix[j][i] = matrix[i][j];
230
matrix[i][j] = 0.001;
231
matrix[j][i] = 0.001;
237
/* set the diagonal entries */
238
for (int i = 0; i < natom; i++) {
239
if (vsd != null) matrix[i][i] = vsd[i];
240
else matrix[i][i] = 0.0;
247
* Calculates the three classes of BCUT descriptors.
249
* @param container Parameter is the atom container.
250
* @return An ArrayList containing the descriptors. The default is to return
251
* all calculated eigenvalues of the Burden matrices in the order described
252
* above. If a parameter list was supplied, then only the specified number
253
* of highest and lowest eigenvalues (for each class of BCUT) will be returned.
254
* @throws CDKException if the wrong number of eigenvalues are requested (negative or more than the number
257
public DescriptorValue calculate(IAtomContainer container) throws CDKException {
261
molecule = (Molecule) container.clone();
262
} catch (CloneNotSupportedException e) {
263
logger.debug("Error during clone");
264
throw new CDKException("Error occured during clone "+e);
267
// add H's in case they're not present
268
HydrogenAdder hydrogenAdder = new HydrogenAdder();
270
hydrogenAdder.addExplicitHydrogensToSatisfyValency(molecule);
271
} catch (Exception e) {
272
throw new CDKException("Could not add hydrogens: " + e.getMessage(), e);
275
// do aromaticity detecttion for calculating polarizability later on
276
if (this.checkAromaticity) {
277
HueckelAromaticityDetector.detectAromaticity(molecule);
280
// find number of heavy atoms
282
for (int i = 0; i < molecule.getAtomCount(); i++) {
283
if (!molecule.getAtom(i).getSymbol().equals("H")) nheavy++;
286
if (this.nhigh > nheavy || this.nlow > nheavy) {
287
throw new CDKException("Number of negative or positive eigenvalues cannot be more than number of heavy atoms");
290
double[] diagvalue = new double[nheavy];
292
// get atomic mass weighted BCUT
295
for (int i = 0; i < molecule.getAtomCount(); i++) {
296
if (molecule.getAtom(i).getSymbol().equals("H")) continue;
297
diagvalue[counter] = IsotopeFactory.getInstance(molecule.getBuilder()).
298
getMajorIsotope(molecule.getAtom(i).getSymbol()).getExactMass();
301
} catch (Exception e) {
302
throw new CDKException("Could not calculate weight: " + e.getMessage(), e);
304
double[][] burdenMatrix = BurdenMatrix.evalMatrix(molecule, diagvalue);
305
Matrix matrix = new Matrix(burdenMatrix);
306
EigenvalueDecomposition eigenDecomposition = new EigenvalueDecomposition(matrix);
307
double[] eval1 = eigenDecomposition.getRealEigenvalues();
309
// get charge weighted BCUT
310
GasteigerMarsiliPartialCharges peoe;
312
peoe = new GasteigerMarsiliPartialCharges();
313
peoe.assignGasteigerMarsiliSigmaPartialCharges(molecule, true);
314
} catch (Exception e) {
315
throw new CDKException("Could not calculate partial charges: " + e.getMessage(), e);
318
for (int i = 0; i < molecule.getAtomCount(); i++) {
319
if (molecule.getAtom(i).getSymbol().equals("H")) continue;
320
diagvalue[counter] = molecule.getAtom(i).getCharge();
323
burdenMatrix = BurdenMatrix.evalMatrix(molecule, diagvalue);
324
matrix = new Matrix(burdenMatrix);
325
eigenDecomposition = new EigenvalueDecomposition(matrix);
326
double[] eval2 = eigenDecomposition.getRealEigenvalues();
328
// get polarizability weighted BCUT
329
Polarizability pol = new Polarizability();
331
for (int i = 0; i < molecule.getAtomCount(); i++) {
332
if (molecule.getAtom(i).getSymbol().equals("H")) continue;
333
diagvalue[counter] = pol.calculateGHEffectiveAtomPolarizability(molecule, molecule.getAtom(i), 1000);
336
burdenMatrix = BurdenMatrix.evalMatrix(molecule, diagvalue);
337
matrix = new Matrix(burdenMatrix);
338
eigenDecomposition = new EigenvalueDecomposition(matrix);
339
double[] eval3 = eigenDecomposition.getRealEigenvalues();
341
DoubleArrayResult retval = new DoubleArrayResult(eval1.length + eval2.length + eval3.length);
344
String[] suffix = {"w", "c", "p"};
346
if (nhigh == 0 || nlow == 0) {
347
// return all calculated eigenvalues
348
for (int i = 0; i < eval1.length; i++) retval.add(eval1[i]);
349
for (int i = 0; i < eval2.length; i++) retval.add(eval2[i]);
350
for (int i = 0; i < eval3.length; i++) retval.add(eval3[i]);
352
names = new String[retval.length()];
353
for (int j = 0; j < suffix.length; j++) {
354
for (int i = 0; i < eval1.length; i++) {
355
names[i] = "BCUT" + suffix[j] + "-" + (i + 1) + "l";
357
for (int i = 0; i < eval1.length; i++) {
358
names[i] = "BCUT" + suffix[j] + "-" + (i + 1) + "h";
362
// return only the n highest & lowest eigenvalues
363
for (int i = 0; i < nlow; i++) retval.add(eval1[i]);
364
for (int i = 0; i < nhigh; i++) retval.add(eval1[eval1.length - i - 1]);
366
for (int i = 0; i < nlow; i++) retval.add(eval2[i]);
367
for (int i = 0; i < nhigh; i++) retval.add(eval2[eval2.length - i - 1]);
369
for (int i = 0; i < nlow; i++) retval.add(eval3[i]);
370
for (int i = 0; i < nhigh; i++) retval.add(eval3[eval3.length - i - 1]);
372
names = new String[3 * nhigh + 3 * nlow];
374
for (int j = 0; j < suffix.length; j++) {
375
for (int i = 0; i < nhigh; i++) {
376
names[counter++] = "BCUT" + suffix[j] + "-" + (i + 1) + "l";
378
for (int i = 0; i < nlow; i++) {
379
names[counter++] = "BCUT" + suffix[j] + "-" + (i + 1) + "h";
385
return new DescriptorValue(getSpecification(), getParameterNames(), getParameters(), retval, names);
389
* Returns the specific type of the DescriptorResult object.
391
* The return value from this method really indicates what type of result will
392
* be obtained from the {@link org.openscience.cdk.qsar.DescriptorValue} object. Note that the same result
393
* can be achieved by interrogating the {@link org.openscience.cdk.qsar.DescriptorValue} object; this method
394
* allows you to do the same thing, without actually calculating the descriptor.
396
* @return an object that implements the {@link org.openscience.cdk.qsar.result.IDescriptorResult} interface indicating
397
* the actual type of values returned by the descriptor in the {@link org.openscience.cdk.qsar.DescriptorValue} object
399
public IDescriptorResult getDescriptorResultType() {
400
return new DoubleArrayResult();