1
/* $Revision: 8242 $ $Author: egonw $ $Date: 2007-04-22 22:49:31 +0200 (Sun, 22 Apr 2007) $
3
* Copyright (C) 1997-2007 The Chemistry Development Kit (CDK) project
5
* Contact: cdk-devel@lists.sourceforge.net
7
* This program is free software; you can redistribute it and/or
8
* modify it under the terms of the GNU Lesser General Public License
9
* as published by the Free Software Foundation; either version 2.1
10
* of the License, or (at your option) any later version.
11
* All we ask is that proper credit is given for our work, which includes
12
* - but is not limited to - adding the above copyright notice to the beginning
13
* of your source code files, and to any copyright notice that you may distribute
14
* with programs based on this work.
16
* This program is distributed in the hope that it will be useful,
17
* but WITHOUT ANY WARRANTY; without even the implied warranty of
18
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
* GNU Lesser General Public License for more details.
21
* You should have received a copy of the GNU Lesser General Public License
22
* along with this program; if not, write to the Free Software
23
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
25
package org.openscience.cdk.io;
27
import java.io.BufferedReader;
28
import java.io.IOException;
29
import java.io.InputStream;
30
import java.io.InputStreamReader;
31
import java.io.Reader;
32
import java.io.StringReader;
33
import java.util.Hashtable;
36
import javax.vecmath.Point3d;
38
import org.openscience.cdk.CDKConstants;
39
import org.openscience.cdk.config.AtomTypeFactory;
40
import org.openscience.cdk.exception.CDKException;
41
import org.openscience.cdk.exception.NoSuchAtomTypeException;
42
import org.openscience.cdk.graph.rebond.RebondTool;
43
import org.openscience.cdk.interfaces.IAtom;
44
import org.openscience.cdk.interfaces.IAtomType;
45
import org.openscience.cdk.interfaces.IBioPolymer;
46
import org.openscience.cdk.interfaces.IChemFile;
47
import org.openscience.cdk.interfaces.IChemModel;
48
import org.openscience.cdk.interfaces.IChemObject;
49
import org.openscience.cdk.interfaces.IChemSequence;
50
import org.openscience.cdk.interfaces.IMolecule;
51
import org.openscience.cdk.interfaces.IMoleculeSet;
52
import org.openscience.cdk.interfaces.IMonomer;
53
import org.openscience.cdk.interfaces.IStrand;
54
import org.openscience.cdk.io.formats.IResourceFormat;
55
import org.openscience.cdk.io.formats.PDBFormat;
56
import org.openscience.cdk.io.setting.BooleanIOSetting;
57
import org.openscience.cdk.io.setting.IOSetting;
58
import org.openscience.cdk.protein.data.PDBAtom;
59
import org.openscience.cdk.protein.data.PDBMonomer;
60
import org.openscience.cdk.protein.data.PDBPolymer;
61
import org.openscience.cdk.protein.data.PDBStrand;
62
import org.openscience.cdk.protein.data.PDBStructure;
63
import org.openscience.cdk.tools.LoggingTool;
64
import org.openscience.cdk.tools.manipulator.AtomTypeManipulator;
67
* Reads the contents of a PDBFile.
69
* <p>A description can be found at <a href="http://www.rcsb.org/pdb/static.do?p=file_formats/pdb/index.html">
70
* http://www.rcsb.org/pdb/static.do?p=file_formats/pdb/index.html</a>.
74
* @author Edgar Luttmann
75
* @author Bradley Smith <bradley@baysmith.com>
76
* @author Martin Eklund <martin.eklund@farmbio.uu.se>
77
* @author Ola Spjuth <ola.spjuth@farmbio.uu.se>
78
* @cdk.created 2001-08-06
79
* @cdk.keyword file format, PDB
81
public class PDBReader extends DefaultChemObjectReader {
83
private LoggingTool logger;
84
private BufferedReader _oInput; // The internal used BufferedReader
85
private BooleanIOSetting useRebondTool;
86
private BooleanIOSetting readConnect;
88
private Map atomNumberMap;
90
private static AtomTypeFactory pdbFactory;
94
* Contructs a new PDBReader that can read Molecules from a given
97
* @param oIn The InputStream to read from
100
public PDBReader(InputStream oIn) {
101
this(new InputStreamReader(oIn));
106
* Contructs a new PDBReader that can read Molecules from a given
109
* @param oIn The Reader to read from
112
public PDBReader(Reader oIn) {
113
logger = new LoggingTool(this.getClass());
114
_oInput = new BufferedReader(oIn);
120
this(new StringReader(""));
123
public IResourceFormat getFormat() {
124
return PDBFormat.getInstance();
127
public void setReader(Reader input) throws CDKException {
128
if (input instanceof BufferedReader) {
129
this._oInput = (BufferedReader)input;
131
this._oInput = new BufferedReader(input);
135
public void setReader(InputStream input) throws CDKException {
136
setReader(new InputStreamReader(input));
139
public boolean accepts(Class classObject) {
140
Class[] interfaces = classObject.getInterfaces();
141
for (int i=0; i<interfaces.length; i++) {
142
if (IChemFile.class.equals(interfaces[i])) return true;
149
* Takes an object which subclasses IChemObject, e.g. Molecule, and will
150
* read this (from file, database, internet etc). If the specific
151
* implementation does not support a specific IChemObject it will throw
154
* @param oObj The object that subclasses IChemObject
155
* @return The IChemObject read
156
* @exception CDKException
159
public IChemObject read(IChemObject oObj) throws CDKException {
160
if (oObj instanceof IChemFile) {
161
if (pdbFactory == null) {
163
pdbFactory = AtomTypeFactory.getInstance("org/openscience/cdk/config/data/pdb_atomtypes.xml",
164
((IChemFile)oObj).getBuilder());
165
} catch (Exception exception) {
166
logger.debug(exception);
167
throw new CDKException("Could not setup list of PDB atom types! " + exception.getMessage());
170
return readChemFile((IChemFile)oObj);
172
throw new CDKException("Only supported is reading of ChemFile objects.");
177
* Read a <code>ChemFile</code> from a file in PDB format. The molecules
178
* in the file are stored as <code>BioPolymer</code>s in the
179
* <code>ChemFile</code>. The residues are the monomers of the
180
* <code>BioPolymer</code>, and their names are the concatenation of the
181
* residue, chain id, and the sequence number. Separate chains (denoted by
182
* TER records) are stored as separate <code>BioPolymer</code> molecules.
184
* <p>Connectivity information is not currently read.
186
* @return The ChemFile that was read from the PDB file.
188
private IChemFile readChemFile(IChemFile oFile) {
189
// initialize all containers
190
IChemSequence oSeq = oFile.getBuilder().newChemSequence();
191
IChemModel oModel = oFile.getBuilder().newChemModel();
192
IMoleculeSet oSet = oFile.getBuilder().newMoleculeSet();
194
// some variables needed
197
PDBPolymer oBP = new PDBPolymer();
198
IMolecule molecularStructure = oFile.getBuilder().newMolecule();
199
StringBuffer cResidue;
203
char chain = 'A'; // To ensure stringent name giving of monomers
207
boolean isProteinStructure = false;
209
atomNumberMap = new Hashtable();
211
// do the reading of the Input
214
cRead = _oInput.readLine();
215
logger.debug("Read line: ", cRead);
217
lineLength = cRead.length();
219
if (lineLength < 80) {
220
logger.warn("Line is not of the expected length 80!");
223
// make sure the record name is 6 characters long
224
if (lineLength < 6) {
227
// check the first column to decide what to do
228
cCol = cRead.substring(0,6);
229
if ("SEQRES".equalsIgnoreCase(cCol)) {
230
isProteinStructure = true;
231
} else if ("ATOM ".equalsIgnoreCase(cCol)) {
232
// read an atom record
233
oAtom = readAtom(cRead, lineLength);
235
if (isProteinStructure) {
236
// construct a string describing the residue
237
cResidue = new StringBuffer(8);
238
oObj = oAtom.getResName();
240
cResidue = cResidue.append(oObj.trim());
242
oObj = oAtom.getChainID();
244
// cResidue = cResidue.append(((String)oObj).trim());
245
cResidue = cResidue.append(String.valueOf(chain));
247
oObj = oAtom.getResSeq();
249
cResidue = cResidue.append(oObj.trim());
252
// search for an existing strand or create a new one.
253
String strandName = oAtom.getChainID();
254
if (strandName == null || strandName.length() == 0) {
255
strandName = String.valueOf(chain);
257
oStrand = oBP.getStrand(strandName);
258
if (oStrand == null) {
259
oStrand = new PDBStrand();
260
oStrand.setStrandName(strandName);
261
oStrand.setID(String.valueOf(chain));
264
// search for an existing monomer or create a new one.
265
oMonomer = oBP.getMonomer(cResidue.toString(), String.valueOf(chain));
266
if (oMonomer == null) {
267
PDBMonomer monomer = new PDBMonomer();
268
monomer.setMonomerName(cResidue.toString());
269
monomer.setMonomerType(oAtom.getResName());
270
monomer.setChainID(oAtom.getChainID());
271
monomer.setICode(oAtom.getICode());
272
monomer.setResSeq(oAtom.getResSeq());
277
oBP.addAtom(oAtom, oMonomer, oStrand);
278
if (readConnect.isSet() && atomNumberMap.put(new Integer(oAtom.getSerial()), oAtom) != null) {
279
logger.warn("Duplicate serial ID found for atom: ", oAtom);
282
molecularStructure.addAtom(oAtom);
284
logger.debug("Added ATOM: ", oAtom);
286
/** As HETATMs cannot be considered to either belong to a certain monomer or strand,
287
* they are dealt with seperately.*/
288
} else if("HETATM".equalsIgnoreCase(cCol)) {
289
// read an atom record
290
oAtom = readAtom(cRead, lineLength);
291
oAtom.setHetAtom(true);
293
if (atomNumberMap.put(new Integer(oAtom.getSerial()), oAtom) != null) {
294
logger.warn("Duplicate serial ID found for atom: ", oAtom);
296
logger.debug("Added HETATM: ", oAtom);
297
} else if ("TER ".equalsIgnoreCase(cCol)) {
300
oStrand = new PDBStrand();
301
oStrand.setStrandName(String.valueOf(chain));
302
logger.debug("Added new STRAND");
303
} else if ("END ".equalsIgnoreCase(cCol)) {
304
atomNumberMap.clear();
305
if (isProteinStructure) {
306
// create bonds and finish the molecule
307
if (useRebondTool.isSet()) {
309
if(!createBondsWithRebondTool(oBP)) {
310
// Get rid of all potentially created bonds.
311
logger.info("Bonds could not be created using the RebondTool when PDB file was read.");
312
oBP.removeAllBonds();
314
} catch (Exception exception) {
315
logger.info("Bonds could not be created when PDB file was read.");
316
logger.debug(exception);
319
oSet.addMolecule(oBP);
321
oSet.addMolecule(molecularStructure);
323
} else if (cCol.equals("MODEL ")) {
324
// OK, start a new model and save the current one first *if* it contains atoms
325
if (isProteinStructure) {
326
if (oBP.getAtomCount() > 0) {
328
oSet.addAtomContainer(oBP);
329
oModel.setMoleculeSet(oSet);
330
oSeq.addChemModel(oModel);
332
oBP = new PDBPolymer();
333
oModel = oFile.getBuilder().newChemModel();
334
oSet = oFile.getBuilder().newMoleculeSet();
337
if (molecularStructure.getAtomCount() > 0) {
339
oSet.addAtomContainer(molecularStructure);
340
oModel.setMoleculeSet(oSet);
341
oSeq.addChemModel(oModel);
343
molecularStructure = oFile.getBuilder().newMolecule();
344
oModel = oFile.getBuilder().newChemModel();
345
oSet = oFile.getBuilder().newMoleculeSet();
348
} else if ("REMARK".equalsIgnoreCase(cCol)) {
349
Object comment = oFile.getProperty(CDKConstants.COMMENT);
350
if (comment == null) {
353
if (lineLength >12) {
354
comment = comment.toString() + cRead.substring(11).trim() + "\n";
355
oFile.setProperty(CDKConstants.COMMENT, comment);
357
logger.warn("REMARK line found without any comment!");
359
} else if ("COMPND".equalsIgnoreCase(cCol)) {
360
String title = cRead.substring(10).trim();
361
oFile.setProperty(CDKConstants.TITLE, title);
364
/*************************************************************
365
* Read connectivity information from CONECT records.
366
* Only covalent bonds are dealt with. Perhaps salt bridges
367
* should be dealt with in the same way..?
369
else if (readConnect.isSet() && "CONECT".equalsIgnoreCase(cCol)) {
371
if (cRead.length() < 16) {
372
logger.debug("Skipping unexpected empty CONECT line! : ", cRead);
375
String bondAtom = cRead.substring(7, 11).trim();
376
int bondAtomNo = Integer.parseInt(bondAtom);
377
String bondedAtom = cRead.substring(12, 16).trim();
378
int bondedAtomNo = -1;
380
try {bondedAtomNo = Integer.parseInt(bondedAtom);}
381
catch(Exception e) {bondedAtomNo = -1;}
383
if(bondedAtomNo != -1) {
384
addBond(oBP, bondAtomNo, bondedAtomNo);
385
logger.warn("Bonded " + bondAtomNo + " with " + bondedAtomNo);
388
if(cRead.length() > 17) {
389
bondedAtom = cRead.substring(17, 21);
390
bondedAtom = bondedAtom.trim();
391
try {bondedAtomNo = Integer.parseInt(bondedAtom);}
392
catch(Exception e) {bondedAtomNo = -1;}
394
if(bondedAtomNo != -1) {
395
addBond(oBP, bondAtomNo, bondedAtomNo);
396
logger.warn("Bonded " + bondAtomNo + " with " + bondedAtomNo);
400
if(cRead.length() > 22) {
401
bondedAtom = cRead.substring(22, 26);
402
bondedAtom = bondedAtom.trim();
403
try {bondedAtomNo = Integer.parseInt(bondedAtom);}
404
catch(Exception e) {bondedAtomNo = -1;}
406
if(bondedAtomNo != -1) {
407
addBond(oBP, bondAtomNo, bondedAtomNo);
408
logger.warn("Bonded " + bondAtomNo + " with " + bondedAtomNo);
412
if(cRead.length() > 27) {
413
bondedAtom = cRead.substring(27, 31);
414
bondedAtom = bondedAtom.trim();
415
try {bondedAtomNo = Integer.parseInt(bondedAtom);}
416
catch(Exception e) {bondedAtomNo = -1;}
418
if(bondedAtomNo != -1) {
419
addBond(oBP, bondAtomNo, bondedAtomNo);
420
logger.warn("Bonded " + bondAtomNo + " with " + bondedAtomNo);
425
/*************************************************************/
427
else if ("HELIX ".equalsIgnoreCase(cCol)) {
428
// HELIX 1 H1A CYS A 11 LYS A 18 1 RESIDUE 18 HAS POSITIVE PHI 1D66 72
430
// 01234567890123456789012345678901234567890123456789012345678901234567890123456789
431
PDBStructure structure = new PDBStructure();
432
structure.setStructureType(PDBStructure.HELIX);
433
structure.setStartChainID(cRead.charAt(19));
434
structure.setStartSequenceNumber(Integer.parseInt(cRead.substring(21, 25).trim()));
435
structure.setStartInsertionCode(cRead.charAt(25));
436
structure.setEndChainID(cRead.charAt(31));
437
structure.setEndSequenceNumber(Integer.parseInt(cRead.substring(33, 37).trim()));
438
structure.setEndInsertionCode(cRead.charAt(37));
439
oBP.addStructure(structure);
440
} else if ("SHEET ".equalsIgnoreCase(cCol)) {
441
PDBStructure structure = new PDBStructure();
442
structure.setStructureType(PDBStructure.SHEET);
443
structure.setStartChainID(cRead.charAt(21));
444
structure.setStartSequenceNumber(Integer.parseInt(cRead.substring(22, 26).trim()));
445
structure.setStartInsertionCode(cRead.charAt(26));
446
structure.setEndChainID(cRead.charAt(32));
447
structure.setEndSequenceNumber(Integer.parseInt(cRead.substring(33, 37).trim()));
448
structure.setEndInsertionCode(cRead.charAt(37));
449
oBP.addStructure(structure);
450
} else if ("TURN ".equalsIgnoreCase(cCol)) {
451
PDBStructure structure = new PDBStructure();
452
structure.setStructureType(PDBStructure.TURN);
453
structure.setStartChainID(cRead.charAt(19));
454
structure.setStartSequenceNumber(Integer.parseInt(cRead.substring(20, 24).trim()));
455
structure.setStartInsertionCode(cRead.charAt(24));
456
structure.setEndChainID(cRead.charAt(30));
457
structure.setEndSequenceNumber(Integer.parseInt(cRead.substring(31, 35).trim()));
458
structure.setEndInsertionCode(cRead.charAt(35));
459
oBP.addStructure(structure);
460
} // ignore all other commands
462
} while (_oInput.ready() && (cRead != null));
463
} catch (Exception e) {
464
logger.error("Found a problem at line:\n");
466
logger.error("01234567890123456789012345678901234567890123456789012345678901234567890123456789");
467
logger.error(" 1 2 3 4 5 6 7 ");
468
logger.error(" error: " + e.getMessage());
472
// try to close the Input
475
} catch (Exception e) {
479
// Set all the dependencies
480
oModel.setMoleculeSet(oSet);
481
oSeq.addChemModel(oModel);
482
oFile.addChemSequence(oSeq);
487
private void addBond(PDBPolymer obp, int bondAtomNo, int bondedAtomNo) {
488
IAtom firstAtom = (PDBAtom)atomNumberMap.get(new Integer(bondAtomNo));
489
IAtom secondAtom = (PDBAtom)atomNumberMap.get(new Integer(bondedAtomNo));
490
if (firstAtom == null) {
491
logger.error("Could not find bond start atom in map with serial id: ", bondAtomNo);
493
if (secondAtom == null) {
494
logger.error("Could not find bond target atom in map with serial id: ", bondAtomNo);
496
obp.addBond(firstAtom.getBuilder().newBond(firstAtom, secondAtom, 1));
499
private boolean createBondsWithRebondTool(IBioPolymer pol){
500
RebondTool tool = new RebondTool(2.0, 0.5, 0.5);
503
AtomTypeFactory factory = AtomTypeFactory.getInstance("org/openscience/cdk/config/data/jmol_atomtypes.txt",
505
java.util.Iterator atoms = pol.atoms();
506
while (atoms.hasNext()) {
507
IAtom atom = (IAtom)atoms.next();
509
IAtomType[] types = factory.getAtomTypes(atom.getSymbol());
510
if (types.length > 0) {
511
// just pick the first one
512
AtomTypeManipulator.configure(atom, types[0]);
514
logger.warn("Could not configure atom with symbol: "+ atom.getSymbol());
516
} catch (Exception e) {
517
logger.warn("Could not configure atom (but don't care): " + e.getMessage());
522
} catch (Exception e) {
523
logger.error("Could not rebond the polymer: " + e.getMessage());
530
* Creates an <code>Atom</code> and sets properties to their values from
531
* the ATOM record. If the line is shorter than 80 characters, the information
532
* past 59 characters is treated as optional. If the line is shorter than 59
533
* characters, a <code>RuntimeException</code> is thrown.
535
* @param cLine the PDB ATOM record.
536
* @return the <code>Atom</code> created from the record.
537
* @throws RuntimeException if the line is too short (less than 59 characters).
539
private PDBAtom readAtom(String cLine, int lineLength) {
540
// a line looks like:
541
// 01234567890123456789012345678901234567890123456789012345678901234567890123456789
542
// ATOM 1 O5* C A 1 20.662 36.632 23.475 1.00 10.00 114D 45
543
// ATOM 1186 1H ALA 1 10.105 5.945 -6.630 1.00 0.00 1ALE1288
545
if (lineLength < 59) {
546
throw new RuntimeException("PDBReader error during readAtom(): line too short");
548
String elementSymbol = cLine.substring(12, 14).trim();
550
if (elementSymbol.length() == 2) {
551
// ensure that the second char is lower case
552
elementSymbol = elementSymbol.charAt(0) + elementSymbol.substring(1).toLowerCase();
554
String rawAtomName = cLine.substring(12, 16).trim();
555
String resName = cLine.substring(17, 20).trim();
557
IAtomType type = pdbFactory.getAtomType(resName+"."+rawAtomName);
558
elementSymbol = type.getSymbol();
559
} catch (NoSuchAtomTypeException e) {
560
logger.error("Did not recognize PDB atom type: " + resName+"."+rawAtomName);
562
PDBAtom oAtom = new PDBAtom(elementSymbol,
563
new Point3d(Double.parseDouble(cLine.substring(30, 38)),
564
Double.parseDouble(cLine.substring(38, 46)),
565
Double.parseDouble(cLine.substring(46, 54))
569
oAtom.setRecord(cLine);
570
oAtom.setSerial(Integer.parseInt(cLine.substring(6, 11).trim()));
571
oAtom.setName(rawAtomName.trim());
572
oAtom.setAltLoc(cLine.substring(16, 17).trim());
573
oAtom.setResName(resName);
574
oAtom.setChainID(cLine.substring(21, 22).trim());
575
oAtom.setResSeq(cLine.substring(22, 26).trim());
576
oAtom.setICode(cLine.substring(26, 27).trim());
577
oAtom.setAtomTypeName(oAtom.getResName()+"."+rawAtomName);
578
if (lineLength >= 59) {
579
String frag = cLine.substring(54, 60).trim();
580
if (frag.length() > 0) {
581
oAtom.setOccupancy(Double.parseDouble(frag));
584
if (lineLength >= 65) {
585
String frag = cLine.substring(60, 66).trim();
586
if (frag.length() > 0) {
587
oAtom.setTempFactor(Double.parseDouble(frag));
590
if (lineLength >= 75) {
591
oAtom.setSegID(cLine.substring(72, 76).trim());
593
// if (lineLength >= 78) {
594
// oAtom.setSymbol((new String(cLine.substring(76, 78))).trim());
596
if (lineLength >= 79) {
597
String frag = cLine.substring(78, 80).trim();
598
if (frag.length() > 0) {
599
oAtom.setCharge(Double.parseDouble(frag));
603
/*************************************************************************************
604
* It sets a flag in the property content of an atom,
605
* which is used when bonds are created to check if the atom is an OXT-record => needs
608
String oxt = cLine.substring(13, 16).trim();
610
if(oxt.equals("OXT")) {
616
/*************************************************************************************/
621
public void close() throws IOException {
625
private void initIOSettings() {
626
useRebondTool = new BooleanIOSetting("UseRebondTool", IOSetting.LOW,
627
"Should the PDBReader deduce bonding patterns?",
629
readConnect = new BooleanIOSetting("ReadConnectSection", IOSetting.LOW,
630
"Should the CONECT be read?",
634
public void customizeJob() {
635
fireIOSettingQuestion(useRebondTool);
636
fireIOSettingQuestion(readConnect);
639
public IOSetting[] getIOSettings() {
640
IOSetting[] settings = new IOSetting[2];
641
settings[0] = useRebondTool;
642
settings[1] = readConnect;