1
/***************************************************************************
2
* Copyright (C) 2005 by Roberto Virga *
3
* rvirga@users.sf.net *
5
* This program is free software; you can redistribute it and/or modify *
6
* it under the terms of the GNU General Public License as published by *
7
* the Free Software Foundation; either version 2 of the License, or *
8
* (at your option) any later version. *
10
* This program is distributed in the hope that it will be useful, *
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13
* GNU General Public License for more details. *
15
* You should have received a copy of the GNU General Public License *
16
* along with this program; if not, write to the *
17
* Free Software Foundation, Inc., *
18
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19
***************************************************************************/
23
#include "predictordata.h"
25
const QString PredictorAminoAcidName[] =
26
{"GLY", "ALA", "SER", "CYS", "VAL", "THR", "ILE", "PRO", "MET", "ASP",
27
"ASN", "LEU", "LYS", "GLU", "GLN", "ARG", "HIS", "PHE", "TYR", "TRP",
30
const char PredictorAminoAcidAbbrev[] =
31
{'G', 'A', 'S', 'C', 'V', 'T', 'I', 'P', 'M', 'D',
32
'N', 'L', 'K', 'E', 'Q', 'R', 'H', 'F', 'Y', 'W',
35
const unsigned PredictorAminoAcidAtoms[] =
36
{4, 5, 6, 6, 7, 7, 8, 7, 8, 8, 8, 8, 9, 9, 9, 11, 10, 11, 12, 14, 0};
39
const double PredictorAminoAcidMass[] =
40
{ 75.07, 89.09, 105.09, 121.16, 117.15, 119.12, 131.17, 115.13, 149.21, 133.10,
41
132.12, 131.17, 146.19, 147.13, 146.15, 174.20, 155.16, 165.19, 181.19, 204.23,
45
bool parseAminoAcid(const QString &name, PredictorAminoAcid &aa)
47
for(unsigned i = 0; i < AminoAcids; ++i)
48
if(PredictorAminoAcidName[i] == name) {
49
aa = PredictorAminoAcid(i);
56
QValueList<uint> parseUIntList(const QString &text)
58
QStringList list = QStringList::split(" ", text);
61
for(QStringList::const_iterator it = list.constBegin(); list.constEnd() != it; ++it)
62
out << (*it).toUInt(0, 10);
67
QValueList<double> parseDoubleList(const QString &text)
69
QStringList list = QStringList::split(" ", text);
70
QValueList<double> out;
72
for(QStringList::const_iterator it = list.constBegin(); list.constEnd() != it; ++it)
73
out << (*it).toDouble();
78
bool PredictorBurials::parse(const QStringList &lines)
80
QStringList::const_iterator line = lines.constBegin();
82
if(lines.constEnd() == line
83
|| !(*line).contains("Average percentage of burial")) return false;
87
if(lines.constEnd() == line) return false;
91
for(unsigned row = 0; row <= AminoAcids; ++row)
93
if(lines.constEnd() == line) return false;
95
QValueList<double> values = parseDoubleList((*line).mid(4));
96
if(values.count() != AminoAcids+1) return false;
98
for(unsigned column = 0; column <= AminoAcids; ++column)
99
avg[row][column] = values[column];
104
if(lines.constEnd() == line
105
|| !(*line).contains("Average standard deviation of burial")) return false;
109
if(lines.constEnd() == line) return false;
113
for(unsigned row = 0; row <= AminoAcids; ++row)
115
if(lines.constEnd() == line) return false;
117
QValueList<double> values = parseDoubleList((*line).mid(4));
118
if(values.count() != AminoAcids+1) return false;
120
for(unsigned column = 0; column <= AminoAcids; ++column)
121
sdev[row][column] = values[column];
126
if(lines.constEnd() == line
127
|| !(*line).contains("Number of pairs used")) return false;
128
++line; if(lines.constEnd() == line) return false;
131
++line; if(lines.constEnd() == line) return false;
134
for(unsigned row = 0; row <= AminoAcids; ++row)
136
QValueList<unsigned> values = parseUIntList((*line).mid(4));
137
if(values.count() != AminoAcids+1) return false;
139
for(unsigned column = 0; column <= AminoAcids; ++column)
140
pairs[row][column] = values[column];
142
++line; if(lines.constEnd() == line) return false;
148
bool PredictorECovers24::parse(const QStringList &lines)
150
QStringList::const_iterator line = lines.constBegin();
153
if(lines.constEnd() == line) return false;
157
for(unsigned row = 0; row < AminoAcids; ++row)
159
if(lines.constEnd() == line) return false;
161
QValueList<double> values = parseDoubleList((*line).mid(4));
162
if(values.count() != 25) return false;
164
for(unsigned column = 0; column < 25; ++column)
165
value[row][column] = values[column];
173
bool PredictorProfile3::parse(const QStringList &lines)
175
QStringList::const_iterator line = lines.constBegin();
177
for(unsigned aa = 0; aa < AminoAcids; ++aa)
180
if(lines.constEnd() == line) return false;
183
for(unsigned set = 0; set < 5; ++set)
184
for(unsigned row = 0; row < 5; ++row)
186
if(lines.constEnd() == line) return false;
187
sscanf(*line, "%lf %lf %lf %lf %lf",
188
&value[aa][set][row][0],
189
&value[aa][set][row][1],
190
&value[aa][set][row][2],
191
&value[aa][set][row][3],
192
&value[aa][set][row][4]);
200
bool PredictorQuasi3::parse(const QStringList &lines)
202
unsigned npar = 0, nmid = 0, nant = 0;
203
QStringList::const_iterator line = lines.constBegin();
205
while(lines.constEnd() != line)
207
if((*line).startsWith("PAR")) {
210
for(unsigned row = 0; row < AminoAcids; ++row)
212
if(lines.constEnd() == line) return false;
214
QValueList<double> values = parseDoubleList((*line).mid(4));
215
if(values.count() != AminoAcids) return false;
217
for(unsigned column = 0; column < AminoAcids; ++column)
218
par[npar][row][column] = values[column];
225
else if((*line).startsWith("MID"))
229
for(unsigned row = 0; row < AminoAcids; ++row)
231
if(lines.constEnd() == line) return false;
233
QValueList<double> values = parseDoubleList((*line).mid(4));
234
if(values.count() != AminoAcids) return false;
236
for(unsigned column = 0; column < AminoAcids; ++column)
237
mid[nmid][row][column] = values[column];
244
else if((*line).startsWith("ANT"))
248
for(unsigned row = 0; row < AminoAcids; ++row)
250
if(lines.constEnd() == line) return false;
252
QValueList<double> values = parseDoubleList((*line).mid(4));
253
if(values.count() != AminoAcids) return false;
255
for(unsigned column = 0; column < AminoAcids; ++column)
256
ant[nant][row][column] = values[column];
270
bool PredictorScale3B::parse(const QString &line)
272
if(!parseAminoAcid(line.mid(0, 3), aa[0])) return false;
273
if(!parseAminoAcid(line.mid(4, 3), aa[1])) return false;
274
if(!parseAminoAcid(line.mid(8, 3), aa[2])) return false;
275
sscanf(line.mid(12), "%u %u %u %lf",
276
&count[0], &count[1], &count[2],
282
bool PredictorS1234::parse(const QStringList &lines)
284
QStringList::const_iterator line = lines.constBegin();
286
while(lines.constEnd() != line)
288
if((*line).startsWith("##### R1.2")) {
291
for(unsigned row = 0; row < AminoAcids; ++row)
292
for(unsigned column = 0; column < AminoAcids; ++column)
295
if(lines.constEnd() == line) return false;
298
if(lines.constEnd() == line) return false;
299
sscanf(*line, "%lf %lf %lf",
300
&r1_2[row][column][0],
301
&r1_2[row][column][1],
302
&r1_2[row][column][2]);
306
} else if((*line).startsWith("##### R1.3")) {
309
for(unsigned row = 0; row < AminoAcids; ++row)
310
for(unsigned column = 0; column < AminoAcids; ++column)
313
if(lines.constEnd() == line) return false;
316
if(lines.constEnd() == line) return false;
317
sscanf(*line, "%lf %lf %lf %lf",
318
&r1_3[row][column][0],
319
&r1_3[row][column][1],
320
&r1_3[row][column][2],
321
&r1_3[row][column][3]);
325
} else if((*line).startsWith("##### R1.4")) {
328
for(unsigned row = 0; row < AminoAcids; ++row)
329
for(unsigned column = 0; column < AminoAcids; ++column)
332
if(lines.constEnd() == line) return false;
335
if(lines.constEnd() == line) return false;
336
sscanf(*line, "%lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf",
337
&r1_4[row][column][0],
338
&r1_4[row][column][1],
339
&r1_4[row][column][2],
340
&r1_4[row][column][3],
341
&r1_4[row][column][4],
342
&r1_4[row][column][5],
343
&r1_4[row][column][6],
344
&r1_4[row][column][7],
345
&r1_4[row][column][8],
346
&r1_4[row][column][9],
347
&r1_4[row][column][10],
348
&r1_4[row][column][11],
349
&r1_4[row][column][12],
350
&r1_4[row][column][13]);
354
} else if((*line).startsWith("##### R1.5")) {
357
for(unsigned row = 0; row < AminoAcids; ++row)
358
for(unsigned column = 0; column < AminoAcids; ++column)
361
if(lines.constEnd() == line) return false;
364
if(lines.constEnd() == line) return false;
365
sscanf(*line, "%lf %lf %lf %lf %lf %lf %lf",
366
&r1_5[row][column][0],
367
&r1_5[row][column][1],
368
&r1_5[row][column][2],
369
&r1_5[row][column][3],
370
&r1_5[row][column][4],
371
&r1_5[row][column][5],
372
&r1_5[row][column][6]);
383
bool PredictorMonssterAtom::parse(const QString &line)
385
sscanf(line, "%u %u %u", &x, &y, &z);
390
bool PredictorMonssterInput::parse(const QStringList &lines)
392
QStringList::const_iterator line = lines.constBegin();
394
if(lines.constEnd() == line) return false;
395
sscanf(*line, "%u %u %u %u", &random, &ncycle, &icycle, &tsteps);
398
if(lines.constEnd() == line) return false;
399
sscanf(*line, "%u %u", &resmin, &resmax);
402
if(lines.constEnd() == line) return false;
405
if(lines.constEnd() == line) return false;
406
sscanf(*line, "%lf %lf %lf %lf", &temp[0], &temp[1], &softcore, ¢ral);
409
if(lines.constEnd() == line) return false;
410
sscanf(*line, "%lf %lf %lf %lf %lf", &stiff, &pair, &kdcore, &hbond, &shrt);
413
if(lines.constEnd() == line) return false;
414
sscanf(*line, "%lf %lf %lf", &burial, &multibody, &threebody);
420
bool PredictorMonssterResidue::parse(const QString &line)
422
resSeq = line.left(5).toUInt(0, 10);
423
if(!parseAminoAcid(line.mid(8, 3), resName)) return false;
424
sscanf(line.mid(12), "%u %u", &count[0], &count[1]);
429
bool PredictorMonssterSeq::parse(const QStringList &lines)
433
QStringList::const_iterator line = lines.constBegin();
435
while(lines.constEnd() != line)
437
PredictorMonssterResidue item;
438
if(!item.parse(*line)) return false;
440
atoms += PredictorAminoAcidAtoms[item.resName];
448
QString PredictorMonssterSeq::toString() const
453
for(QValueList<PredictorMonssterResidue>::const_iterator group = groups.begin();
454
group != groups.end(); ++group)
456
if(column > 0 && (column % 60) == 0) out.append('\n');
457
out.append(PredictorAminoAcidAbbrev[(*group).resName]);
464
bool PredictorMonssterRestraint::parse(const QString &line)
466
sscanf(line, "%u %lf", &num, &value);
471
bool PredictorAtomPDB::parse(const QString &line)
473
const unsigned len = line.length();
475
if(len < 6 || line.left(6) != "ATOM ") return false;
477
serial = (len > 6) ? line.mid(6, 5).toUInt(0, 10) : 0;
481
element = line.mid(12, 2).stripWhiteSpace();
482
if(element.startsWith("H")) element = "H";
485
const QString greek = " ABGDEZHT";
486
while(line.at(14) != greek.at(name.remoteness))
487
if(++name.remoteness >= greek.length())
490
name.branch = (line.at(15) == ' ') ? 0 : (line.at(15) - '1');
491
name.iupac = line.mid(12, 4).stripWhiteSpace();
495
element = name.iupac = QString::null;
496
name.remoteness = name.branch = 0;
499
altLoc = (len > 16) ? line.at(16) : QChar(' ');
502
if(!parseAminoAcid(line.mid(17, 3), resName)) return false;
504
resName = AminoAcids;
506
chainID = (len > 21) ? line.at(21) : QChar(' ');
508
resSeq = (len > 22) ? line.mid(22, 4).toUInt(0, 10) : 0;
510
iCode = (len > 26) ? line.at(26) : QChar(' ');
512
x = (len > 30) ? line.mid(30, 8).toDouble() : 0.0;
514
y = (len > 38) ? line.mid(38, 8).toDouble() : 0.0;
516
z = (len > 46) ? line.mid(46, 8).toDouble() : 0.0;
518
occupancy = (len > 54) ? line.mid(54, 6).toDouble() : 0.0;
520
tempFactor = (len > 60) ? line.mid(60, 6).toDouble() : 0.0;
522
segID = (len > 72) ? line.mid(72, 4).stripWhiteSpace() : QString::null;
524
if(len > 76) element = line.mid(76, 2).stripWhiteSpace();
526
charge = (len > 78) ? line.mid(78, 2).stripWhiteSpace() : QString::null;
531
bool operator<(const PredictorAtomPDB &a1, const PredictorAtomPDB &a2)
533
return(a1.serial < a2.serial);
536
bool PredictorHelixPDB::parse(const QString &line)
538
const unsigned len = line.length();
540
if(len < 6 || line.left(6) != "HELIX ") return false;
542
serNum = (len > 7) ? line.mid(7, 3).toUInt(0, 10) : 0;
544
helixID = (len > 11) ? line.mid(11, 3).stripWhiteSpace() : QString::null;
547
if(!parseAminoAcid(line.mid(15, 3), init.resName)) return false;
549
init.resName = AminoAcids;
551
init.chainID = (len > 19) ? line.at(19) : QChar(' ');
553
init.seqNum = (len > 21) ? line.mid(21, 4).toUInt(0, 10) : 0;
555
init.iCode = (len > 25) ? line.at(25) : QChar(' ');
558
if(!parseAminoAcid(line.mid(27, 3), end.resName)) return false;
560
end.resName = AminoAcids;
562
end.chainID = (len > 31) ? line.at(31) : QChar(' ');
564
end.seqNum = (len > 33) ? line.mid(33, 4).toUInt(0, 10) : 0;
566
end.iCode = (len > 37) ? line.at(37) : QChar(' ');
568
helixClass = (len > 38) ? PredictorHelixClass(line.mid(38, 2).toUInt(0, 10)) : RightHandedApha;
570
comment = (len > 40) ? line.mid(40, 30).stripWhiteSpace() : QString::null;
572
length = (len > 71) ? line.mid(71, 5).toUInt(0, 10) : 0;
577
bool operator<(const PredictorHelixPDB &h1, const PredictorHelixPDB &h2)
579
return(h1.init.seqNum < h2.init.seqNum);
582
bool PredictorSheetPDB::parse(const QString &line)
584
const unsigned len = line.length();
586
if(len < 6 || line.left(6) != "SHEET ") return false;
588
strand = (len > 7) ? line.mid(7, 3).toUInt(0, 10) : 0;
590
sheetID = (len > 11) ? line.mid(11, 3).stripWhiteSpace() : QString::null;
592
numStrands = (len > 14) ? line.mid(14, 2).toUInt(0, 10) : 0;
595
if(!parseAminoAcid(line.mid(17, 3), init.resName)) return false;
597
init.resName = AminoAcids;
599
init.chainID = (len > 21) ? line.at(21) : QChar(' ');
601
init.seqNum = (len > 22) ? line.mid(22, 4).toUInt(0, 10) : 0;
603
init.iCode = (len > 26) ? line.at(26) : QChar(' ');
606
if(!parseAminoAcid(line.mid(28, 3), end.resName)) return false;
608
end.resName = AminoAcids;
610
end.chainID = (len > 32) ? line.at(32) : QChar(' ');
612
end.seqNum = (len > 33) ? line.mid(33, 4).toUInt(0, 10) : 0;
614
end.iCode = (len > 37) ? line.at(37) : QChar(' ');
616
sense = (len > 38) ? line.mid(38, 2).toInt(0, 10) : 0;
618
curr.atom = (len > 41) ? line.mid(41, 4).stripWhiteSpace() : QString::null;
621
if(!parseAminoAcid(line.mid(45, 3), curr.resName)) return false;
623
curr.resName = AminoAcids;
625
curr.chainId = (len > 49) ? line.at(49) : QChar(' ');
627
curr.resSeq = (len > 50) ? line.mid(50, 4).toUInt(0, 10) : 0;
629
curr.iCode = (len > 54) ? line.at(54) : QChar(' ');
631
prev.atom = (len > 56) ? line.mid(56, 4).stripWhiteSpace() : QString::null;
634
if(!parseAminoAcid(line.mid(60, 3), prev.resName)) return false;
636
prev.resName = AminoAcids;
638
prev.chainId = (len > 64) ? line.at(64) : QChar(' ');
640
prev.resSeq = (len > 65) ? line.mid(65, 4).toUInt(0, 10) : 0;
642
prev.iCode = (len > 69) ? line.at(69) : QChar(' ');
647
bool operator<(const PredictorSheetPDB &s1, const PredictorSheetPDB &s2)
649
return(s1.init.seqNum < s2.init.seqNum);
652
bool PredictorTurnPDB::parse(const QString &line)
654
const unsigned len = line.length();
656
if(len < 6 || line.left(6) != "TURN ") return false;
658
seq = (len > 7) ? line.mid(7, 3).toUInt(0, 10) : 0;
660
turnID = (len > 11) ? line.mid(11, 3).stripWhiteSpace() : QString::null;
663
if(!parseAminoAcid(line.mid(15, 3), init.resName)) return false;
665
init.resName = AminoAcids;
667
init.chainID = (len > 19) ? line.at(19) : QChar(' ');
669
init.seqNum = (len > 20) ? line.mid(20, 4).toUInt(0, 10) : 0;
671
init.iCode = (len > 24) ? line.at(24) : QChar(' ');
674
if(!parseAminoAcid(line.mid(26, 3), end.resName)) return false;
676
end.resName = AminoAcids;
678
end.chainID = (len > 30) ? line.at(30) : QChar(' ');
680
end.seqNum = (len > 31) ? line.mid(31, 4).toUInt(0, 10) : 0;
682
end.iCode = (len > 35) ? line.at(35) : QChar(' ');
684
comment = (len > 40) ? line.mid(40, 30) : QString::null;
689
bool operator<(const PredictorTurnPDB &t1, const PredictorTurnPDB &t2)
691
return(t1.init.seqNum < t2.init.seqNum);
694
bool PredictorProteinPDB::parse(const QStringList &lines)
702
bool hasSecondary = false;
703
for(QStringList::const_iterator line = lines.constBegin();
704
line != lines.constEnd(); ++line)
705
if((*line).startsWith("ATOM"))
707
PredictorAtomPDB item;
708
if(!item.parse(*line)) return false;
711
if(item.name.iupac == "CA") groups++;
713
else if((*line).startsWith("HELIX"))
715
PredictorHelixPDB item;
716
if(!item.parse(*line)) return false;
721
else if((*line).startsWith("SHEET"))
723
PredictorSheetPDB item;
724
if(!item.parse(*line)) return false;
729
else if((*line).startsWith("TURN"))
731
PredictorTurnPDB item;
732
if(!item.parse(*line)) return false;
737
else if((*line).startsWith("END"))
750
QString PredictorProteinPDB::toString() const
755
for(QValueList<PredictorAtomPDB>::const_iterator atom = atoms.begin();
756
atom != atoms.end(); ++atom)
758
if((*atom).name.iupac != "CA") continue;
759
if(column > 0 && (column % 60) == 0) out.append('\n');
760
out.append(PredictorAminoAcidAbbrev[(*atom).resName]);
767
bool PredictorProteinNOE::parse(const QString &line)
769
QStringList values = QStringList::split(" ", line);
770
if(values.count() != 21) return false;
772
select[0].index = values[4].toUInt(0, 10);
773
select[0].name = values[5];
775
select[1].index = values[10].toUInt(0, 10);
776
select[1].name = values[11];
778
kmin = values[14].toDouble();
779
rmin = values[16].toDouble();
780
kmax = values[18].toDouble();
781
rmax = values[20].toDouble();
786
bool PredictorCharmmInp::parse(const QStringList &lines)
788
ntemps = nsteps = t.low = t.high = 0;
790
for(QStringList::const_iterator line = lines.begin(); lines.constEnd() != line; ++line)
792
if((*line).stripWhiteSpace().startsWith("!")) continue;
794
int start = (*line).find(QRegExp("set \\w+ = "));
795
if(start < 0) continue;
798
int end = (*line).find('=', start);
799
if(end < 0) continue;
801
const QString key = (*line).mid(start, end - start).stripWhiteSpace(),
802
value = (*line).mid(end+1).stripWhiteSpace();
805
ntemps = value.toUInt(0, 10);
806
else if(key == "nsteps")
807
nsteps = value.toUInt(0, 10);
808
else if(key == "thigh")
809
t.high = value.toUInt(0, 10);
810
else if(key == "tlow")
811
t.low = value.toUInt(0, 10);
817
bool PredictorMonssterRestart::parse(const QStringList &lines)
819
QStringList::const_iterator line = lines.constBegin();
821
if(lines.constEnd() == line) return false;
822
sscanf(*line, "%u %u %lf %lf %lf %lf %lf",
823
&line1a[0], &line1a[1],
824
&line1b[0], &line1b[1], &line1b[2], &line1b[3], &line1b[4]);
827
if(lines.constEnd() == line) return false;
828
sscanf(*line, "%u %u %u", &line2[0], &line2[1], &line2[2]);
831
if(lines.constEnd() == line) return false;
832
sscanf(*line, "%lf %lf", &line3[0], &line3[1]);
837
if(lines.constEnd() == line) return false;
838
sscanf(*line, "%u", &count);
842
for(unsigned i = 0; i < count; ++i)
844
PredictorMonssterAtom item;
846
if(lines.constEnd() == line || !item.parse(*line)) return false;
852
qDebug("...parse OK");