2
"""Unit tests for the pdb parser.
4
from cogent.util.unit_test import TestCase, main
5
from cogent.parse.pdb import dict2pdb, dict2ter, pdb2dict, get_symmetry, \
6
get_coords_offset, get_trailer_offset, \
7
parse_header, parse_coords, parse_trailer, \
9
from cogent.core.entity import Structure
10
from cogent.core.entity import StructureBuilder
11
from numpy import array, allclose
13
__author__ = "Marcin Knight"
14
__copyright__ = "Copyright 2007-2009, The Cogent Project"
15
__credits__ = ["Marcin Cieslik"]
18
__maintainer__ = "Marcin Cieslik"
19
__email__ = "mpc4p@virginia.edu"
20
__status__ = "Production"
23
class pdbTests(TestCase):
24
"""Tests of cogent.parse.pdb functions."""
26
def test_PDBParser(self):
27
"""tests the UI parsing function.
29
fh = open('data/2E12.pdb')
30
structure = PDBParser(fh, 'JUNK')
31
assert type(structure) is Structure
32
assert len(structure) == 1
33
assert (0,) in structure
34
assert structure.header['space_group'] == 'P 21 21 21'
35
assert structure.header['experiment_type'] == 'X-RAY DIFFRACTION'
36
assert structure.header['r_free'] == '0.280'
37
assert structure.header['dbref_acc'] == 'Q8P4R5'
38
assert structure.header['cryst1'] == '49.942 51.699 82.120 90.00 90.00 90.00'
39
assert structure.header['matthews'] == '2.29'
40
model = structure[(0,)]
41
assert len(model) == 2
42
assert structure.raw_header
43
assert structure.raw_trailer
44
assert structure.header
45
assert structure.trailer == {}
46
assert structure.getId() == ('JUNK', )
48
def test_parse_trailer(self):
49
"""testing trailer parsing dummy."""
50
d = parse_trailer(None)
51
assert isinstance(d, dict)
53
def test_parse_coords(self):
54
"""testing minimal structure building and coords parsing.
56
builder = StructureBuilder()
57
builder.initStructure('JUNK')
58
atom = 'ATOM 10 CA PRO A 2 51.588 38.262 31.417 1.00 6.58 C \n'
59
hetatm = 'HETATM 1633 O HOH B 164 17.979 35.529 38.171 1.00 1.02 O \n'
60
lines = ['MODEL ', atom, hetatm]
61
z = parse_coords(builder, lines)
62
assert len(z[(0,)]) == 2
63
assert len(z[(0,)][('A',)]) == 1
64
assert len(z[(0,)][('B',)]) == 1
67
atom1 = z.table['A'][('JUNK', 0, 'A', ('PRO', 2, ' '), ('CA', ' '))]
68
hetatm1 = z.table['A'][('JUNK', 0, 'B', ('H_HOH', 164, ' '), ('O', ' '))]
70
self.assertAlmostEqual([51.588 , 38.262 , 31.417][2], list(atom1.coords)[2])
71
self.assertAlmostEqual([17.979 , 35.529 , 38.171][2], list(hetatm1.coords)[2])
73
def test_parse_header(self):
74
"""testing header parsing.
76
header = ['HEADER TRANSLATION 17-OCT-06 2E12 \n',
77
'TITLE THE CRYSTAL STRUCTURE OF XC5848 FROM XANTHOMONAS CAMPESTRIS \n',
78
'TITLE 2 ADOPTING A NOVEL VARIANT OF SM-LIKE MOTIF \n',
79
'COMPND MOL_ID: 1; \n',
80
'COMPND 2 MOLECULE: HYPOTHETICAL PROTEIN XCC3642; \n',
81
'COMPND 3 CHAIN: A, B; \n',
82
'COMPND 4 SYNONYM: SM-LIKE MOTIF; \n',
83
'COMPND 5 ENGINEERED: YES \n',
84
'SOURCE MOL_ID: 1; \n',
85
'SOURCE 2 ORGANISM_SCIENTIFIC: XANTHOMONAS CAMPESTRIS PV. CAMPESTRIS; \n',
86
'SOURCE 3 ORGANISM_TAXID: 340; \n',
87
'SOURCE 4 STRAIN: PV. CAMPESTRIS; \n',
88
'SOURCE 5 EXPRESSION_SYSTEM: ESCHERICHIA COLI; \n',
89
'SOURCE 6 EXPRESSION_SYSTEM_TAXID: 562 \n',
90
'KEYWDS NOVEL SM-LIKE MOTIF, LSM MOTIF, XANTHOMONAS CAMPESTRIS, X- \n',
91
'KEYWDS 2 RAY CRYSTALLOGRAPHY, TRANSLATION \n',
92
'EXPDTA X-RAY DIFFRACTION \n',
93
'AUTHOR K.-H.CHIN,S.-K.RUAN,A.H.-J.WANG,S.-H.CHOU \n',
94
'REVDAT 2 24-FEB-09 2E12 1 VERSN \n',
95
'REVDAT 1 30-OCT-07 2E12 0 \n',
96
'JRNL AUTH K.-H.CHIN,S.-K.RUAN,A.H.-J.WANG,S.-H.CHOU \n',
97
'JRNL TITL XC5848, AN ORFAN PROTEIN FROM XANTHOMONAS \n',
98
'JRNL TITL 2 CAMPESTRIS, ADOPTS A NOVEL VARIANT OF SM-LIKE MOTIF \n',
99
'JRNL REF PROTEINS V. 68 1006 2007 \n',
100
'JRNL REFN ISSN 0887-3585 \n',
101
'JRNL PMID 17546661 \n',
102
'JRNL DOI 10.1002/PROT.21375 \n',
105
'REMARK 2 RESOLUTION. 1.70 ANGSTROMS. \n',
107
'REMARK 3 REFINEMENT. \n',
108
'REMARK 3 PROGRAM : CNS \n',
109
'REMARK 3 AUTHORS : BRUNGER,ADAMS,CLORE,DELANO,GROS,GROSSE- \n',
110
'REMARK 3 : KUNSTLEVE,JIANG,KUSZEWSKI,NILGES, PANNU, \n',
111
'REMARK 3 : READ,RICE,SIMONSON,WARREN \n',
113
'REMARK 3 REFINEMENT TARGET : NULL \n',
115
'REMARK 3 DATA USED IN REFINEMENT. \n',
116
'REMARK 3 RESOLUTION RANGE HIGH (ANGSTROMS) : 1.70 \n',
117
'REMARK 3 RESOLUTION RANGE LOW (ANGSTROMS) : 30.00 \n',
118
'REMARK 3 DATA CUTOFF (SIGMA(F)) : 5.000 \n',
119
'REMARK 3 DATA CUTOFF HIGH (ABS(F)) : NULL \n',
120
'REMARK 3 DATA CUTOFF LOW (ABS(F)) : NULL \n',
121
'REMARK 3 COMPLETENESS (WORKING+TEST) (%) : 99.1 \n',
122
'REMARK 3 NUMBER OF REFLECTIONS : 6937 \n',
124
'REMARK 3 FIT TO DATA USED IN REFINEMENT. \n',
125
'REMARK 3 CROSS-VALIDATION METHOD : THROUGHOUT \n',
126
'REMARK 3 FREE R VALUE TEST SET SELECTION : RANDOM \n',
127
'REMARK 3 R VALUE (WORKING SET) : 0.220 \n',
128
'REMARK 3 FREE R VALUE : 0.280 \n',
129
'REMARK 3 FREE R VALUE TEST SET SIZE (%) : NULL \n',
130
'REMARK 3 FREE R VALUE TEST SET COUNT : NULL \n',
131
'REMARK 3 ESTIMATED ERROR OF FREE R VALUE : NULL \n',
133
'REMARK 3 FIT IN THE HIGHEST RESOLUTION BIN. \n',
134
'REMARK 3 TOTAL NUMBER OF BINS USED : NULL \n',
135
'REMARK 3 BIN RESOLUTION RANGE HIGH (A) : 1.70 \n',
136
'REMARK 3 BIN RESOLUTION RANGE LOW (A) : 1.75 \n',
137
'REMARK 3 BIN COMPLETENESS (WORKING+TEST) (%) : 97.00 \n',
138
'REMARK 3 REFLECTIONS IN BIN (WORKING SET) : NULL \n',
139
'REMARK 3 BIN R VALUE (WORKING SET) : 0.2400 \n',
140
'REMARK 3 BIN FREE R VALUE : 0.2200 \n',
141
'REMARK 3 BIN FREE R VALUE TEST SET SIZE (%) : NULL \n',
142
'REMARK 3 BIN FREE R VALUE TEST SET COUNT : NULL \n',
143
'REMARK 3 ESTIMATED ERROR OF BIN FREE R VALUE : 0.012 \n',
145
'REMARK 3 NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT. \n',
146
'REMARK 3 PROTEIN ATOMS : 1512 \n',
147
'REMARK 3 NUCLEIC ACID ATOMS : 0 \n',
148
'REMARK 3 HETEROGEN ATOMS : 0 \n',
149
'REMARK 3 SOLVENT ATOMS : 122 \n',
151
'REMARK 3 B VALUES. \n',
152
'REMARK 3 FROM WILSON PLOT (A**2) : 24.00 \n',
153
'REMARK 3 MEAN B VALUE (OVERALL, A**2) : NULL \n',
154
'REMARK 3 OVERALL ANISOTROPIC B VALUE. \n',
155
'REMARK 3 B11 (A**2) : NULL \n',
156
'REMARK 3 B22 (A**2) : NULL \n',
157
'REMARK 3 B33 (A**2) : NULL \n',
158
'REMARK 3 B12 (A**2) : NULL \n',
159
'REMARK 3 B13 (A**2) : NULL \n',
160
'REMARK 3 B23 (A**2) : NULL \n',
162
'REMARK 3 ESTIMATED COORDINATE ERROR. \n',
163
'REMARK 3 ESD FROM LUZZATI PLOT (A) : NULL \n',
164
'REMARK 3 ESD FROM SIGMAA (A) : NULL \n',
165
'REMARK 3 LOW RESOLUTION CUTOFF (A) : NULL \n',
167
'REMARK 3 CROSS-VALIDATED ESTIMATED COORDINATE ERROR. \n',
168
'REMARK 3 ESD FROM C-V LUZZATI PLOT (A) : NULL \n',
169
'REMARK 3 ESD FROM C-V SIGMAA (A) : NULL \n',
171
'REMARK 3 RMS DEVIATIONS FROM IDEAL VALUES. \n',
172
'REMARK 3 BOND LENGTHS (A) : 0.007 \n',
173
'REMARK 3 BOND ANGLES (DEGREES) : 1.32 \n',
174
'REMARK 3 DIHEDRAL ANGLES (DEGREES) : NULL \n',
175
'REMARK 3 IMPROPER ANGLES (DEGREES) : NULL \n',
177
'REMARK 3 ISOTROPIC THERMAL MODEL : NULL \n',
179
'REMARK 3 ISOTROPIC THERMAL FACTOR RESTRAINTS. RMS SIGMA \n',
180
'REMARK 3 MAIN-CHAIN BOND (A**2) : NULL ; NULL \n',
181
'REMARK 3 MAIN-CHAIN ANGLE (A**2) : NULL ; NULL \n',
182
'REMARK 3 SIDE-CHAIN BOND (A**2) : NULL ; NULL \n',
183
'REMARK 3 SIDE-CHAIN ANGLE (A**2) : NULL ; NULL \n',
185
'REMARK 3 BULK SOLVENT MODELING. \n',
186
'REMARK 3 METHOD USED : NULL \n',
187
'REMARK 3 KSOL : NULL \n',
188
'REMARK 3 BSOL : NULL \n',
190
'REMARK 3 NCS MODEL : NULL \n',
192
'REMARK 3 NCS RESTRAINTS. RMS SIGMA/WEIGHT \n',
193
'REMARK 3 GROUP 1 POSITIONAL (A) : NULL ; NULL \n',
194
'REMARK 3 GROUP 1 B-FACTOR (A**2) : NULL ; NULL \n',
196
'REMARK 3 PARAMETER FILE 1 : NULL \n',
197
'REMARK 3 TOPOLOGY FILE 1 : NULL \n',
199
'REMARK 3 OTHER REFINEMENT REMARKS: NULL \n',
201
'REMARK 4 2E12 COMPLIES WITH FORMAT V. 3.15, 01-DEC-08 \n',
203
'REMARK 100 THIS ENTRY HAS BEEN PROCESSED BY PDBJ ON 19-OCT-06. \n',
204
'REMARK 100 THE RCSB ID CODE IS RCSB026092. \n',
206
'REMARK 200 EXPERIMENTAL DETAILS \n',
207
'REMARK 200 EXPERIMENT TYPE : X-RAY DIFFRACTION \n',
208
'REMARK 200 DATE OF DATA COLLECTION : 28-JUL-06 \n',
209
'REMARK 200 TEMPERATURE (KELVIN) : 100 \n',
210
'REMARK 200 PH : 8.0 \n',
211
'REMARK 200 NUMBER OF CRYSTALS USED : 10 \n',
213
'REMARK 200 SYNCHROTRON (Y/N) : Y \n',
214
'REMARK 200 RADIATION SOURCE : NSRRC \n',
215
'REMARK 200 BEAMLINE : BL13B1 \n',
216
'REMARK 200 X-RAY GENERATOR MODEL : NULL \n',
217
'REMARK 200 MONOCHROMATIC OR LAUE (M/L) : M \n',
218
'REMARK 200 WAVELENGTH OR RANGE (A) : 0.96437, 0.97983 \n',
219
'REMARK 200 MONOCHROMATOR : NULL \n',
220
'REMARK 200 OPTICS : NULL \n',
222
'REMARK 200 DETECTOR TYPE : CCD \n',
223
'REMARK 200 DETECTOR MANUFACTURER : ADSC QUANTUM 315 \n',
224
'REMARK 200 INTENSITY-INTEGRATION SOFTWARE : DENZO \n',
225
'REMARK 200 DATA SCALING SOFTWARE : HKL-2000 \n',
227
'REMARK 200 NUMBER OF UNIQUE REFLECTIONS : 6937 \n',
228
'REMARK 200 RESOLUTION RANGE HIGH (A) : 1.700 \n',
229
'REMARK 200 RESOLUTION RANGE LOW (A) : 30.000 \n',
230
'REMARK 200 REJECTION CRITERIA (SIGMA(I)) : 2.000 \n',
232
'REMARK 200 OVERALL. \n',
233
'REMARK 200 COMPLETENESS FOR RANGE (%) : 99.7 \n',
234
'REMARK 200 DATA REDUNDANCY : 4.500 \n',
235
'REMARK 200 R MERGE (I) : 0.24000 \n',
236
'REMARK 200 R SYM (I) : 0.06000 \n',
237
'REMARK 200 <I/SIGMA(I)> FOR THE DATA SET : 8.0000 \n',
239
'REMARK 200 IN THE HIGHEST RESOLUTION SHELL. \n',
240
'REMARK 200 HIGHEST RESOLUTION SHELL, RANGE HIGH (A) : 1.70 \n',
241
'REMARK 200 HIGHEST RESOLUTION SHELL, RANGE LOW (A) : NULL \n',
242
'REMARK 200 COMPLETENESS FOR SHELL (%) : 97.5 \n',
243
'REMARK 200 DATA REDUNDANCY IN SHELL : 4.50 \n',
244
'REMARK 200 R MERGE FOR SHELL (I) : 0.06000 \n',
245
'REMARK 200 R SYM FOR SHELL (I) : 0.24000 \n',
246
'REMARK 200 <I/SIGMA(I)> FOR SHELL : 7.900 \n',
248
'REMARK 200 DIFFRACTION PROTOCOL: MAD \n',
249
'REMARK 200 METHOD USED TO DETERMINE THE STRUCTURE: MAD \n',
250
'REMARK 200 SOFTWARE USED: AMORE \n',
251
'REMARK 200 STARTING MODEL: NULL \n',
253
'REMARK 200 REMARK: NULL \n',
255
'REMARK 280 CRYSTAL \n',
256
'REMARK 280 SOLVENT CONTENT, VS (%): 46.26 \n',
257
'REMARK 280 MATTHEWS COEFFICIENT, VM (ANGSTROMS**3/DA): 2.29 \n',
259
'REMARK 280 CRYSTALLIZATION CONDITIONS: PH 8.0, VAPOR DIFFUSION, SITTING \n',
260
'REMARK 280 DROP, TEMPERATURE 298K \n',
261
'REMARK 290 REMARK: NULL \n',
263
'REMARK 300 BIOMOLECULE: 1 \n',
264
'REMARK 300 SEE REMARK 350 FOR THE AUTHOR PROVIDED AND/OR PROGRAM \n',
265
'REMARK 300 GENERATED ASSEMBLY INFORMATION FOR THE STRUCTURE IN \n',
266
'REMARK 300 THIS ENTRY. THE REMARK MAY ALSO PROVIDE INFORMATION ON \n',
267
'REMARK 300 BURIED SURFACE AREA. \n',
269
'REMARK 465 MISSING RESIDUES \n',
270
'REMARK 465 THE FOLLOWING RESIDUES WERE NOT LOCATED IN THE \n',
271
'REMARK 465 EXPERIMENT. (M=MODEL NUMBER; RES=RESIDUE NAME; C=CHAIN \n',
272
'REMARK 465 IDENTIFIER; SSSEQ=SEQUENCE NUMBER; I=INSERTION CODE.) \n',
274
'REMARK 465 M RES C SSSEQI \n',
275
'REMARK 465 LEU A 94 \n',
276
'REMARK 465 GLY A 95 \n',
277
'REMARK 465 ALA A 96 \n',
278
'REMARK 465 PRO A 97 \n',
279
'REMARK 465 GLN A 98 \n',
280
'REMARK 465 VAL A 99 \n',
281
'REMARK 465 MET A 100 \n',
282
'REMARK 465 PRO A 101 \n',
283
'REMARK 465 LEU B 94 \n',
284
'REMARK 465 GLY B 95 \n',
285
'REMARK 465 ALA B 96 \n',
286
'REMARK 465 PRO B 97 \n',
287
'REMARK 465 GLN B 98 \n',
288
'REMARK 465 VAL B 99 \n',
289
'REMARK 465 MET B 100 \n',
290
'REMARK 465 PRO B 101 \n',
292
'REMARK 500 GEOMETRY AND STEREOCHEMISTRY \n',
293
'REMARK 500 SUBTOPIC: CLOSE CONTACTS IN SAME ASYMMETRIC UNIT \n',
295
'REMARK 500 THE FOLLOWING ATOMS ARE IN CLOSE CONTACT. \n',
297
'REMARK 500 ATM1 RES C SSEQI ATM2 RES C SSEQI DISTANCE \n',
298
'REMARK 500 O HOH A 127 O HOH A 149 2.05 \n',
300
'REMARK 500 REMARK: NULL \n',
302
'REMARK 500 GEOMETRY AND STEREOCHEMISTRY \n',
303
'REMARK 500 SUBTOPIC: TORSION ANGLES \n',
305
'REMARK 500 TORSION ANGLES OUTSIDE THE EXPECTED RAMACHANDRAN REGIONS: \n',
306
'REMARK 500 (M=MODEL NUMBER; RES=RESIDUE NAME; C=CHAIN IDENTIFIER; \n',
307
'REMARK 500 SSEQ=SEQUENCE NUMBER; I=INSERTION CODE). \n',
309
'REMARK 500 STANDARD TABLE: \n',
310
'REMARK 500 FORMAT:(10X,I3,1X,A3,1X,A1,I4,A1,4X,F7.2,3X,F7.2) \n',
312
'REMARK 500 EXPECTED VALUES: GJ KLEYWEGT AND TA JONES (1996). PHI/PSI- \n',
313
'REMARK 500 CHOLOGY: RAMACHANDRAN REVISITED. STRUCTURE 4, 1395 - 1400 \n',
315
'REMARK 500 M RES CSSEQI PSI PHI \n',
316
'REMARK 500 ASN A 64 -175.84 -178.56 \n',
317
'REMARK 500 HIS A 71 -156.72 -164.33 \n',
318
'REMARK 500 LEU A 72 -70.52 -135.73 \n',
319
'REMARK 500 ALA A 74 -75.47 -29.45 \n',
320
'REMARK 500 SER A 75 -5.54 -145.62 \n',
321
'REMARK 500 GLN A 76 -178.36 65.32 \n',
322
'REMARK 500 GLU A 77 115.33 61.52 \n',
323
'REMARK 500 MET A 92 -36.89 93.21 \n',
324
'REMARK 500 LEU B 25 37.31 -77.89 \n',
325
'REMARK 500 GLN B 28 37.09 32.85 \n',
326
'REMARK 500 ARG B 30 132.20 -36.84 \n',
327
'REMARK 500 ASN B 64 -172.78 -175.93 \n',
328
'REMARK 500 GLN B 76 67.64 34.46 \n',
329
'REMARK 500 PRO B 91 -156.99 -48.61 \n',
330
'REMARK 500 MET B 92 -37.52 -160.44 \n',
332
'REMARK 500 REMARK: NULL \n',
334
'REMARK 525 SOLVENT \n',
336
'REMARK 525 THE SOLVENT MOLECULES HAVE CHAIN IDENTIFIERS THAT \n',
337
'REMARK 525 INDICATE THE POLYMER CHAIN WITH WHICH THEY ARE MOST \n',
338
'REMARK 525 CLOSELY ASSOCIATED. THE REMARK LISTS ALL THE SOLVENT \n',
339
'REMARK 525 MOLECULES WHICH ARE MORE THAN 5A AWAY FROM THE \n',
340
'REMARK 525 NEAREST POLYMER CHAIN (M = MODEL NUMBER; \n',
341
'REMARK 525 RES=RESIDUE NAME; C=CHAIN IDENTIFIER; SSEQ=SEQUENCE \n',
342
'REMARK 525 NUMBER; I=INSERTION CODE): \n',
344
'REMARK 525 M RES CSSEQI \n',
345
'REMARK 525 HOH B 115 DISTANCE = 6.82 ANGSTROMS \n',
346
'REMARK 525 HOH A 116 DISTANCE = 6.52 ANGSTROMS \n',
347
'REMARK 525 HOH B 119 DISTANCE = 5.12 ANGSTROMS \n',
348
'REMARK 525 HOH B 121 DISTANCE = 5.21 ANGSTROMS \n',
349
'REMARK 525 HOH B 123 DISTANCE = 5.18 ANGSTROMS \n',
350
'REMARK 525 HOH A 124 DISTANCE = 6.99 ANGSTROMS \n',
351
'REMARK 525 HOH B 124 DISTANCE = 5.13 ANGSTROMS \n',
352
'REMARK 525 HOH B 134 DISTANCE = 7.25 ANGSTROMS \n',
353
'REMARK 525 HOH B 140 DISTANCE = 5.54 ANGSTROMS \n',
354
'REMARK 525 HOH B 141 DISTANCE = 5.94 ANGSTROMS \n',
355
'REMARK 525 HOH B 142 DISTANCE = 6.60 ANGSTROMS \n',
356
'REMARK 525 HOH B 143 DISTANCE = 7.39 ANGSTROMS \n',
357
'REMARK 525 HOH A 145 DISTANCE = 9.25 ANGSTROMS \n',
358
'REMARK 525 HOH A 150 DISTANCE = 6.01 ANGSTROMS \n',
359
'REMARK 525 HOH B 152 DISTANCE = 5.46 ANGSTROMS \n',
360
'REMARK 525 HOH B 153 DISTANCE = 9.74 ANGSTROMS \n',
361
'REMARK 525 HOH B 154 DISTANCE = 9.32 ANGSTROMS \n',
362
'REMARK 525 HOH B 155 DISTANCE = 5.41 ANGSTROMS \n',
363
'REMARK 525 HOH B 163 DISTANCE = 5.16 ANGSTROMS \n',
364
'DBREF 2E12 A 1 101 UNP Q8P4R5 Q8P4R5_XANCP 1 101 \n',
365
'DBREF 2E12 B 1 101 UNP Q8P4R5 Q8P4R5_XANCP 1 101 \n',
366
'SEQRES 1 A 101 MET PRO LYS TYR ALA PRO HIS VAL TYR THR GLU GLN ALA \n',
367
'SEQRES 2 A 101 GLN ILE ALA THR LEU GLU HIS TRP VAL LYS LEU LEU ASP \n',
368
'SEQRES 3 A 101 GLY GLN GLU ARG VAL ARG ILE GLU LEU ASP ASP GLY SER \n',
369
'SEQRES 4 A 101 MET ILE ALA GLY THR VAL ALA VAL ARG PRO THR ILE GLN \n',
370
'SEQRES 5 A 101 THR TYR ARG ASP GLU GLN GLU ARG GLU GLY SER ASN GLY \n',
371
'SEQRES 6 A 101 GLN LEU ARG ILE ASP HIS LEU ASP ALA SER GLN GLU PRO \n',
372
'SEQRES 7 A 101 GLN TRP ILE TRP MET ASP ARG ILE VAL ALA VAL HIS PRO \n',
373
'SEQRES 8 A 101 MET PRO LEU GLY ALA PRO GLN VAL MET PRO \n',
374
'SEQRES 1 B 101 MET PRO LYS TYR ALA PRO HIS VAL TYR THR GLU GLN ALA \n',
375
'SEQRES 2 B 101 GLN ILE ALA THR LEU GLU HIS TRP VAL LYS LEU LEU ASP \n',
376
'SEQRES 3 B 101 GLY GLN GLU ARG VAL ARG ILE GLU LEU ASP ASP GLY SER \n',
377
'SEQRES 4 B 101 MET ILE ALA GLY THR VAL ALA VAL ARG PRO THR ILE GLN \n',
378
'SEQRES 5 B 101 THR TYR ARG ASP GLU GLN GLU ARG GLU GLY SER ASN GLY \n',
379
'SEQRES 6 B 101 GLN LEU ARG ILE ASP HIS LEU ASP ALA SER GLN GLU PRO \n',
380
'SEQRES 7 B 101 GLN TRP ILE TRP MET ASP ARG ILE VAL ALA VAL HIS PRO \n',
381
'SEQRES 8 B 101 MET PRO LEU GLY ALA PRO GLN VAL MET PRO \n',
382
'FORMUL 3 HOH *122(H2 O) \n',
383
'HELIX 1 1 GLU A 11 LEU A 24 1 14 \n',
384
'HELIX 2 2 GLU B 11 LEU B 25 1 15 \n',
385
'SHEET 1 A 3 ILE A 51 ARG A 55 0 \n',
386
'SHEET 2 A 3 GLU A 61 ASP A 70 -1 O ASN A 64 N GLN A 52 \n',
387
'SHEET 3 A 3 GLN A 79 TRP A 82 -1 O ILE A 81 N LEU A 67 \n',
388
'SHEET 1 B 5 ILE A 51 ARG A 55 0 \n',
389
'SHEET 2 B 5 GLU A 61 ASP A 70 -1 O ASN A 64 N GLN A 52 \n',
390
'SHEET 3 B 5 MET A 40 VAL A 45 -1 N THR A 44 O ASP A 70 \n',
391
'SHEET 4 B 5 ARG A 30 LEU A 35 -1 N ILE A 33 O ILE A 41 \n',
392
'SHEET 5 B 5 ILE A 86 PRO A 91 -1 O VAL A 87 N GLU A 34 \n',
393
'SHEET 1 C 5 PRO B 78 TRP B 82 0 \n',
394
'SHEET 2 C 5 GLN B 66 ASP B 70 -1 N ILE B 69 O GLN B 79 \n',
395
'SHEET 3 C 5 MET B 40 VAL B 47 -1 N ALA B 46 O ARG B 68 \n',
396
'SHEET 4 C 5 VAL B 31 LEU B 35 -1 N ILE B 33 O ILE B 41 \n',
397
'SHEET 5 C 5 ILE B 86 HIS B 90 -1 O VAL B 87 N GLU B 34 \n',
398
'SHEET 1 D 2 GLN B 52 ARG B 55 0 \n',
399
'SHEET 2 D 2 GLU B 61 ASN B 64 -1 O ASN B 64 N GLN B 52 \n',
400
'CRYST1 49.942 51.699 82.120 90.00 90.00 90.00 P 21 21 21 8 \n']
403
'bio_cmx': [[[('A',), ('B',)], 1]],
404
'uc_mxs': array([[[ 1. , 0. , 0. , 0. ],\
405
[ 0. , 1. , 0. , 0. ],\
406
[ 0. , 0. , 1. , 0. ],\
407
[ 0. , 0. , 0. , 1. ]],\
409
[[ -1. , 0. , 0. , 24.971 ],\
410
[ 0. , -1. , 0. , 0. ],\
411
[ 0. , 0. , 1. , 41.06 ],\
412
[ 0. , 0. , 0. , 1. ]],\
414
[[ -1. , 0. , 0. , 0. ],\
415
[ 0. , 1. , 0. , 25.8495],\
416
[ 0. , 0. , -1. , 41.06 ],\
417
[ 0. , 0. , 0. , 1. ]],\
419
[[ 1. , 0. , 0. , 24.971 ],\
420
[ 0. , -1. , 0. , 25.8495],\
421
[ 0. , 0. , -1. , 0. ],\
422
[ 0. , 0. , 0. , 1. ]]]), \
423
'dbref_acc_full': 'Q8P4R5_XANCP', \
424
'name': 'TRANSLATION', \
425
'solvent_content': '46.26', \
427
'bio_mxs': array([[[ 1., 0., 0., 0.],\
430
[ 0., 0., 0., 1.]]]),
431
'uc_omx': array([[ 49.94256605, 0. , 0. ],\
432
[ 0. , 51.69828879, 0. ],\
433
[ 0. , 0. , 82.12203334]]), \
434
'space_group': 'P 21 21 21', 'r_free': '0.280', \
435
'cryst1': '49.942 51.699 82.120 90.00 90.00 90.00', \
436
'experiment_type': 'X-RAY DIFFRACTION', \
437
'uc_fmx': array([[ 0.020023, 0. , 0. ],\
438
[ 0. , 0.019343, 0. ],\
439
[ 0. , 0. , 0.012177]]),\
440
'date': '17-OCT-06', \
441
'matthews': '2.29', \
442
'resolution': '1.70', \
444
'dbref_acc': 'Q8P4R5'}
446
parsed_header = parse_header(header)
447
for key, val in parsed_header.items():
448
assert val == correct_header[key]
450
def test_get_trailer_offset(self):
451
lines = ['ATOM','CONNECT']
452
assert get_trailer_offset(lines) == 1
454
def test_get_coords_offset(self):
455
lines = ['dummy','ATOM','CONNECT']
456
assert get_coords_offset(lines) == 1
458
def test_get_symmetry(self):
459
"""testing parsing of symmetry operators
462
'REMARK 290 SMTRY1 1 1.000000 0.000000 0.000000 0.00000 \n',
463
'REMARK 290 SMTRY2 1 0.000000 1.000000 0.000000 0.00000 \n',
464
'REMARK 290 SMTRY3 1 0.000000 0.000000 1.000000 0.00000 \n',
465
'REMARK 290 SMTRY1 2 -1.000000 0.000000 0.000000 24.97100 \n',
466
'REMARK 290 SMTRY2 2 0.000000 -1.000000 0.000000 0.00000 \n',
467
'REMARK 290 SMTRY3 2 0.000000 0.000000 1.000000 41.06000 \n',
468
'REMARK 290 SMTRY1 3 -1.000000 0.000000 0.000000 0.00000 \n',
469
'REMARK 290 SMTRY2 3 0.000000 1.000000 0.000000 25.84950 \n',
470
'REMARK 290 SMTRY3 3 0.000000 0.000000 -1.000000 41.06000 \n',
471
'REMARK 290 SMTRY1 4 1.000000 0.000000 0.000000 24.97100 \n',
472
'REMARK 290 SMTRY2 4 0.000000 -1.000000 0.000000 25.84950 \n',
473
'REMARK 290 SMTRY3 4 0.000000 0.000000 -1.000000 0.00000 \n',
475
'REMARK 290 REMARK: NULL \n',
477
'REMARK 350 COORDINATES FOR A COMPLETE MULTIMER REPRESENTING THE KNOWN \n',
478
'REMARK 350 BIOLOGICALLY SIGNIFICANT OLIGOMERIZATION STATE OF THE \n',
479
'REMARK 350 MOLECULE CAN BE GENERATED BY APPLYING BIOMT TRANSFORMATIONS \n',
480
'REMARK 350 GIVEN BELOW. BOTH NON-CRYSTALLOGRAPHIC AND \n',
481
'REMARK 350 CRYSTALLOGRAPHIC OPERATIONS ARE GIVEN. \n',
483
'REMARK 350 BIOMOLECULE: 1 \n',
484
'REMARK 350 AUTHOR DETERMINED BIOLOGICAL UNIT: DIMERIC \n',
485
'REMARK 350 APPLY THE FOLLOWING TO CHAINS: A, B \n',
486
'REMARK 350 BIOMT1 1 1.000000 0.000000 0.000000 0.00000 \n',
487
'REMARK 350 BIOMT2 1 0.000000 1.000000 0.000000 0.00000 \n',
488
'REMARK 350 BIOMT3 1 0.000000 0.000000 1.000000 0.00000 \n',
489
'CRYST1 49.942 51.699 82.120 90.00 90.00 90.00 P 21 21 21 8 \n',
490
'ORIGX1 1.000000 0.000000 0.000000 0.00000 \n',
491
'ORIGX2 0.000000 1.000000 0.000000 0.00000 \n',
492
'ORIGX3 0.000000 0.000000 1.000000 0.00000 \n',
493
'SCALE1 0.020023 0.000000 0.000000 0.00000 \n',
494
'SCALE2 0.000000 0.019343 0.000000 0.00000 \n',
495
'SCALE3 0.000000 0.000000 0.012177 0.00000 \n']
496
sym = get_symmetry(lines)
498
'bio_cmx': [[[('A',), ('B',)], 1]],
499
'uc_mxs': array([[[ 1. , 0. , 0. , 0. ],\
500
[ 0. , 1. , 0. , 0. ],\
501
[ 0. , 0. , 1. , 0. ],\
502
[ 0. , 0. , 0. , 1. ]],\
504
[[ -1. , 0. , 0. , 24.971 ],\
505
[ 0. , -1. , 0. , 0. ],\
506
[ 0. , 0. , 1. , 41.06 ],\
507
[ 0. , 0. , 0. , 1. ]],\
509
[[ -1. , 0. , 0. , 0. ],\
510
[ 0. , 1. , 0. , 25.8495],\
511
[ 0. , 0. , -1. , 41.06 ],\
512
[ 0. , 0. , 0. , 1. ]],\
514
[[ 1. , 0. , 0. , 24.971 ],\
515
[ 0. , -1. , 0. , 25.8495],\
516
[ 0. , 0. , -1. , 0. ],\
517
[ 0. , 0. , 0. , 1. ]]]), \
518
'bio_mxs': array([[[ 1., 0., 0., 0.],\
521
[ 0., 0., 0., 1.]]]),
522
'uc_omx': array([[ 49.94256605, 0. , 0. ],\
523
[ 0. , 51.69828879, 0. ],\
524
[ 0. , 0. , 82.12203334]]), \
525
'uc_fmx': array([[ 0.020023, 0. , 0. ],\
526
[ 0. , 0.019343, 0. ],\
527
[ 0. , 0. , 0.012177]]),}
531
assert sym[key] == correct_sym[key]
533
assert allclose(sym[key], correct_sym[key])
535
def test_dict2pdb(self):
536
"""testing pdb dict round-trip.
538
line = 'ATOM 1 N MET A 1 53.045 42.225 33.724 1.00 2.75 N\n'
543
assert d == {'ser_num': 1, 'res_long_id': ('MET', 1, ' '),
546
'at_long_id': ('N', ' '),
547
'bfactor': 2.75, 'chain_id': 'A',
548
'occupancy': 1.0, 'element': ' N',
550
'seg_id': ' ', 'at_id': 'N',
556
def test_dict2ter(self):
557
d = {'ser_num': 1, 'chain_id': 'A', 'res_name': 'MET', 'res_ic': ' ', \
559
assert dict2ter(d) == 'TER 2 MET A 1 \n'
560
if __name__ == '__main__':