1
/* $Revision: 7636 $ $Author: egonw $ $Date: 2007-01-04 18:46:10 +0100 (Thu, 04 Jan 2007) $
3
* Copyright (C) 2004-2007 Egon Willighagen <egonw@users.sf.net>
5
* Contact: cdk-devel@lists.sourceforge.net
7
* This program is free software; you can redistribute it and/or
8
* modify it under the terms of the GNU Lesser General Public License
9
* as published by the Free Software Foundation; either version 2.1
10
* of the License, or (at your option) any later version.
11
* All we ask is that proper credit is given for our work, which includes
12
* - but is not limited to - adding the above copyright notice to the beginning
13
* of your source code files, and to any copyright notice that you may distribute
14
* with programs based on this work.
16
* This program is distributed in the hope that it will be useful,
17
* but WITHOUT ANY WARRANTY; without even the implied warranty of
18
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
* GNU Lesser General Public License for more details.
21
* You should have received a copy of the GNU Lesser General Public License
22
* along with this program; if not, write to the Free Software
23
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
26
package org.openscience.cdk.io;
28
import java.io.BufferedReader;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.io.InputStreamReader;
32
import java.io.Reader;
33
import java.io.StringReader;
34
import java.util.Hashtable;
35
import java.util.Iterator;
36
import java.util.StringTokenizer;
37
import java.util.regex.Matcher;
38
import java.util.regex.Pattern;
40
import javax.vecmath.Point3d;
41
import javax.vecmath.Vector3d;
43
import org.openscience.cdk.exception.CDKException;
44
import org.openscience.cdk.graph.rebond.RebondTool;
45
import org.openscience.cdk.interfaces.IAtom;
46
import org.openscience.cdk.interfaces.IAtomContainer;
47
import org.openscience.cdk.interfaces.IBond;
48
import org.openscience.cdk.interfaces.IChemFile;
49
import org.openscience.cdk.interfaces.IChemModel;
50
import org.openscience.cdk.interfaces.IChemObject;
51
import org.openscience.cdk.interfaces.IChemObjectBuilder;
52
import org.openscience.cdk.interfaces.IChemSequence;
53
import org.openscience.cdk.interfaces.ICrystal;
54
import org.openscience.cdk.io.formats.IResourceFormat;
55
import org.openscience.cdk.io.formats.PMPFormat;
56
import org.openscience.cdk.tools.LoggingTool;
59
* Reads an frames from a PMP formated input.
60
* Both compilation and use of this class requires Java 1.4.
64
* @cdk.keyword file format, Polymorph Predictor (tm)
66
* @author E.L. Willighagen
67
* @cdk.require java1.4+
69
public class PMPReader extends DefaultChemObjectReader {
71
private static final String PMP_ZORDER = "ZOrder";
72
private static final String PMP_ID = "Id";
74
private BufferedReader input;
76
private LoggingTool logger;
78
/* Keep a copy of the PMP model */
79
private IAtomContainer modelStructure;
80
private IChemObject chemObject;
81
/* Keep an index of PMP id -> AtomCountainer id */
82
private Hashtable atomids = new Hashtable();
83
private Hashtable atomGivenIds = new Hashtable();
84
private Hashtable atomZOrders = new Hashtable();
85
private Hashtable bondids = new Hashtable();
86
private Hashtable bondAtomOnes = new Hashtable();
87
private Hashtable bondAtomTwos = new Hashtable();
88
private Hashtable bondOrders = new Hashtable();
90
/* Often used patterns */
93
Pattern atomTypePattern;
97
private RebondTool rebonder;
100
* construct a new reader from a Reader type object
102
* @param input reader from which input is read
104
public PMPReader(Reader input) {
105
this.input = new BufferedReader(input);
106
logger = new LoggingTool(this);
109
/* compile patterns */
110
objHeader = Pattern.compile(".*\\((\\d+)\\s(\\w+)$");
111
objCommand = Pattern.compile(".*\\(A\\s(C|F|D|I|O)\\s(\\w+)\\s+\"?(.*?)\"?\\)$");
112
atomTypePattern = Pattern.compile("^(\\d+)\\s+(\\w+)$");
114
rebonder = new RebondTool(2.0, 0.5, 0.5);
117
public PMPReader(InputStream input) {
118
this(new InputStreamReader(input));
122
this(new StringReader(""));
125
public IResourceFormat getFormat() {
126
return PMPFormat.getInstance();
129
public void setReader(Reader input) throws CDKException {
130
if (input instanceof BufferedReader) {
131
this.input = (BufferedReader)input;
133
this.input = new BufferedReader(input);
137
public void setReader(InputStream input) throws CDKException {
138
setReader(new InputStreamReader(input));
141
public boolean accepts(Class classObject) {
142
Class[] interfaces = classObject.getInterfaces();
143
for (int i=0; i<interfaces.length; i++) {
144
if (IChemFile.class.equals(interfaces[i])) return true;
150
* reads the content from a PMP input. It can only return a
151
* IChemObject of type ChemFile
153
* @param object class must be of type ChemFile
157
public IChemObject read(IChemObject object) throws CDKException {
158
if (object instanceof IChemFile) {
159
return (IChemObject)readChemFile((IChemFile)object);
161
throw new CDKException("Only supported is reading of ChemFile objects.");
165
// private procedures
167
private String readLine() throws IOException {
168
String line = input.readLine();
169
lineNumber = lineNumber + 1;
170
logger.debug("LINE (" + lineNumber + "): ", line);
175
* Private method that actually parses the input to read a ChemFile
178
* Each PMP frame is stored as a Crystal in a ChemModel. The PMP
179
* file is stored as a ChemSequence of ChemModels.
181
* @return A ChemFile containing the data parsed from input.
183
private IChemFile readChemFile(IChemFile chemFile) {
184
IChemSequence chemSequence = chemFile.getBuilder().newChemSequence();
185
IChemModel chemModel = chemFile.getBuilder().newChemModel();
186
ICrystal crystal = chemFile.getBuilder().newCrystal();
189
String line = readLine();
190
while (input.ready() && line != null) {
191
if (line.startsWith("%%Header Start")) {
192
// parse Header section
193
while (input.ready() && line != null && !(line.startsWith("%%Header End"))) {
194
if (line.startsWith("%%Version Number")) {
195
String version = readLine().trim();
196
if (!version.equals("3.00")) {
197
logger.error("The PMPReader only supports PMP files with version 3.00");
203
} else if (line.startsWith("%%Model Start")) {
204
// parse Model section
205
modelStructure = chemFile.getBuilder().newAtomContainer();
206
while (input.ready() && line != null && !(line.startsWith("%%Model End"))) {
207
Matcher objHeaderMatcher = objHeader.matcher(line);
208
if (objHeaderMatcher.matches()) {
209
String object = objHeaderMatcher.group(2);
210
constructObject(chemFile.getBuilder(), object);
211
int id = Integer.parseInt(objHeaderMatcher.group(1));
212
// logger.debug(object + " id: " + id);
214
while (input.ready() && line != null && !(line.trim().equals(")"))) {
215
// parse object command (or new object header)
216
Matcher objCommandMatcher = objCommand.matcher(line);
217
objHeaderMatcher = objHeader.matcher(line);
218
if (objHeaderMatcher.matches()) {
219
// ok, forget about nesting and hope for the best
220
object = objHeaderMatcher.group(2);
221
id = Integer.parseInt(objHeaderMatcher.group(1));
222
constructObject(chemFile.getBuilder(), object);
223
} else if (objCommandMatcher.matches()) {
224
String format = objCommandMatcher.group(1);
225
String command = objCommandMatcher.group(2);
226
String field = objCommandMatcher.group(3);
228
processModelCommand(object, command, format, field);
230
logger.warn("Skipping line: " + line);
234
if (chemObject instanceof IAtom) {
235
atomids.put(new Integer(id), new Integer(modelStructure.getAtomCount()));
236
atomZOrders.put(new Integer((String)chemObject.getProperty(PMP_ZORDER)), new Integer(id));
237
atomGivenIds.put(new Integer((String)chemObject.getProperty(PMP_ID)), new Integer(id));
238
modelStructure.addAtom((IAtom)chemObject);
239
// } else if (chemObject instanceof IBond) {
240
// bondids.put(new Integer(id), new Integer(molecule.getAtomCount()));
241
// molecule.addBond((IBond)chemObject);
243
logger.error("chemObject is not initialized or of bad class type");
245
// logger.debug(molecule.toString());
249
if (line.startsWith("%%Model End")) {
250
// during the Model Start, all bonds are cached as PMP files might
251
// define bonds *before* the involved atoms :(
252
// the next lines dump the cache into the atom container
254
// bondids.put(new Integer(id), new Integer(molecule.getAtomCount()));
255
// molecule.addBond((IBond)chemObject);
256
int bondsFound = bondids.size();
257
logger.debug("Found #bonds: ", bondsFound);
258
logger.debug("#atom ones: ", bondAtomOnes.size());
259
logger.debug("#atom twos: ", bondAtomTwos.size());
260
logger.debug("#orders: ", bondOrders.size());
261
Iterator bonds = bondids.keySet().iterator();
262
while (bonds.hasNext()) {
263
Integer index = (Integer)bonds.next();
264
double order = (bondOrders.get(index) != null ? ((Double)bondOrders.get(index)).doubleValue() : 1.0);
265
logger.debug("index: ", index);
266
logger.debug("ones: ", bondAtomOnes.get(index));
267
IAtom atom1 = modelStructure.getAtom(
268
((Integer)atomids.get(
269
(Integer)bondAtomOnes.get(index)
272
IAtom atom2 = modelStructure.getAtom(
273
((Integer)atomids.get(
274
(Integer)bondAtomTwos.get(index)
277
IBond bond = modelStructure.getBuilder().newBond(atom1, atom2, order);
278
modelStructure.addBond(bond);
281
} else if (line.startsWith("%%Traj Start")) {
282
chemSequence = chemFile.getBuilder().newChemSequence();
283
double energyFragment = 0.0;
284
double energyTotal = 0.0;
286
while (input.ready() && line != null && !(line.startsWith("%%Traj End"))) {
287
if (line.startsWith("%%Start Frame")) {
288
chemModel = chemFile.getBuilder().newChemModel();
289
crystal = chemFile.getBuilder().newCrystal();
290
while (input.ready() && line != null && !(line.startsWith("%%End Frame"))) {
291
// process frame data
292
if (line.startsWith("%%Atom Coords")) {
293
// calculate Z: as it is not explicitely given, try to derive it from the
294
// energy per fragment and the total energy
295
if (energyFragment != 0.0 && energyTotal != 0.0) {
296
Z = (int)Math.round(energyTotal/energyFragment);
297
logger.debug("Z derived from energies: ", Z);
299
// add atomC as atoms to crystal
300
int expatoms = modelStructure.getAtomCount();
301
for (int molCount = 1; molCount<=Z; molCount++) {
302
IAtomContainer clone = modelStructure.getBuilder().newAtomContainer();
303
for (int i=0; i < expatoms; i++) {
305
IAtom a = clone.getBuilder().newAtom();
306
StringTokenizer st = new StringTokenizer(line, " ");
309
Double.parseDouble(st.nextToken()),
310
Double.parseDouble(st.nextToken()),
311
Double.parseDouble(st.nextToken())
314
a.setCovalentRadius(0.6);
315
IAtom modelAtom = modelStructure.getAtom(((Integer)atomids.get(atomGivenIds.get(new Integer(i+1)))).intValue());
316
a.setSymbol(modelAtom.getSymbol());
319
rebonder.rebond(clone);
322
} else if (line.startsWith("%%E/Frag")) {
323
line = readLine().trim();
324
energyFragment = Double.parseDouble(line);
325
} else if (line.startsWith("%%Tot E")) {
326
line = readLine().trim();
327
energyTotal = Double.parseDouble(line);
328
} else if (line.startsWith("%%Lat Vects")) {
331
st = new StringTokenizer(line, " ");
332
crystal.setA(new Vector3d(
333
Double.parseDouble(st.nextToken()),
334
Double.parseDouble(st.nextToken()),
335
Double.parseDouble(st.nextToken())
338
st = new StringTokenizer(line, " ");
339
crystal.setB(new Vector3d(
340
Double.parseDouble(st.nextToken()),
341
Double.parseDouble(st.nextToken()),
342
Double.parseDouble(st.nextToken())
345
st = new StringTokenizer(line, " ");
346
crystal.setC(new Vector3d(
347
Double.parseDouble(st.nextToken()),
348
Double.parseDouble(st.nextToken()),
349
Double.parseDouble(st.nextToken())
351
} else if (line.startsWith("%%Space Group")) {
352
line = readLine().trim();
353
/* standardize space group name.
354
See Crystal.setSpaceGroup() */
355
if ("P 21 21 21 (1)".equals(line)) {
356
crystal.setSpaceGroup("P 2_1 2_1 2_1");
358
crystal.setSpaceGroup("P1");
364
chemModel.setCrystal(crystal);
365
chemSequence.addChemModel(chemModel);
369
chemFile.addChemSequence(chemSequence);
376
} catch (IOException e) {
377
logger.error("An IOException happened: ", e.getMessage());
380
} catch (CDKException e) {
381
logger.error("An CDKException happened: ", e.getMessage());
389
private void processModelCommand(String object, String command, String format, String field) {
390
logger.debug(object + "->" + command + " (" + format + "): " + field);
391
if ("Model".equals(object)) {
392
logger.warn("Unkown PMP Model command: " + command);
393
} else if ("Atom".equals(object)) {
394
if ("ACL".equals(command)) {
395
Matcher atomTypeMatcher = atomTypePattern.matcher(field);
396
if (atomTypeMatcher.matches()) {
397
int atomicnum = Integer.parseInt(atomTypeMatcher.group(1));
398
String type = atomTypeMatcher.group(2);
399
((IAtom)chemObject).setAtomicNumber(atomicnum);
400
((IAtom)chemObject).setSymbol(type);
402
logger.error("Incorrectly formated field value: " + field + ".");
404
} else if ("Charge".equals(command)) {
406
double charge = Double.parseDouble(field);
407
((IAtom)chemObject).setCharge(charge);
408
} catch (NumberFormatException e) {
409
logger.error("Incorrectly formated float field: " + field + ".");
411
} else if ("CMAPPINGS".equals(command)) {
412
} else if ("FFType".equals(command)) {
413
} else if ("Id".equals(command)) {
414
// ok, should take this into account too
415
chemObject.setProperty(PMP_ID, field);
416
} else if ("Mass".equals(command)) {
417
} else if ("XYZ".equals(command)) {
418
} else if ("ZOrder".equals(command)) {
419
// ok, should take this into account too
420
chemObject.setProperty(PMP_ZORDER, field);
422
logger.warn("Unkown PMP Atom command: " + command);
424
} else if ("Bond".equals(object)) {
425
if ("Atom1".equals(command)) {
426
int atomid = Integer.parseInt(field);
427
// this assumes that the atoms involved in this bond are
428
// already added, which seems the case in the PMP files
429
bondAtomOnes.put(new Integer(bondCounter), new Integer(atomid));
430
// IAtom a = molecule.getAtom(realatomid);
431
// ((IBond)chemObject).setAtomAt(a, 0);
432
} else if ("Atom2".equals(command)) {
433
int atomid = Integer.parseInt(field);
434
// this assumes that the atoms involved in this bond are
435
// already added, which seems the case in the PMP files
436
logger.debug("atomids: " + atomids);
437
logger.debug("atomid: " + atomid);
438
bondAtomTwos.put(new Integer(bondCounter), new Integer(atomid));
439
// IAtom a = molecule.getAtom(realatomid);
440
// ((IBond)chemObject).setAtomAt(a, 1);
441
} else if ("Order".equals(command)) {
442
double order = Double.parseDouble(field);
443
bondOrders.put(new Integer(bondCounter), new Double(order));
444
// ((IBond)chemObject).setOrder(order);
445
} else if ("Id".equals(command)) {
446
int bondid = Integer.parseInt(field);
447
bondids.put(new Integer(bondCounter), new Integer(bondid));
448
} else if ("Label".equals(command)) {
449
} else if ("3DGridOrigin".equals(command)) {
450
} else if ("3DGridMatrix".equals(command)) {
451
} else if ("3DGridDivision".equals(command)) {
453
logger.warn("Unkown PMP Bond command: " + command);
456
logger.warn("Unkown PMP object: " + object);
460
private void constructObject(IChemObjectBuilder builder, String object) {
461
if ("Atom".equals(object)) {
462
chemObject = builder.newAtom("C");
463
} else if ("Bond".equals(object)) {
465
chemObject = builder.newBond();
466
} else if ("Model".equals(object)) {
467
modelStructure = builder.newAtomContainer();
469
logger.error("Cannot construct PMP object type: " + object);
473
public void close() throws IOException {