1
// -*- mode: C++; tab-width: 2; -*-
4
// --------------------------------------------------------------------------
5
// OpenMS Mass Spectrometry Framework
6
// --------------------------------------------------------------------------
7
// Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
9
// This library is free software; you can redistribute it and/or
10
// modify it under the terms of the GNU Lesser General Public
11
// License as published by the Free Software Foundation; either
12
// version 2.1 of the License, or (at your option) any later version.
14
// This library is distributed in the hope that it will be useful,
15
// but WITHOUT ANY WARRANTY; without even the implied warranty of
16
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
// Lesser General Public License for more details.
19
// You should have received a copy of the GNU Lesser General Public
20
// License along with this library; if not, write to the Free Software
21
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
// --------------------------------------------------------------------------
24
// $Maintainer: Alexandra Scherbart $
26
// --------------------------------------------------------------------------
28
#ifndef OPENMS_CHEMISTRY_AAINDEX_H
29
#define OPENMS_CHEMISTRY_AAINDEX_H
31
#include <OpenMS/CONCEPT/Exception.h>
32
#include <OpenMS/CONCEPT/Constants.h>
39
@brief Representation of selected %AAIndex properties
41
The literature that describe the indices can be found with:
42
@n Kawashima, S., Ogata, H., and Kanehisa, M. (1999).
43
@n <em>AAindex: Amino Acid Index Database</em>,
44
@n Nucleic Acids Res, 27(1), 368–369.
46
The provided values are:
47
- GB500 Estimated gas-phase basicity at 500 K,
48
- VASM830103 Relative population of conformational state E,
49
- NADH010106 Hydropathy scale (36% accessibility),
50
- FAUJ880111 Positive charge,
51
- WILM950102 Hydrophobicity coefficient in RP-HPLC, C8 with 0.1%TFA/MeCN/H2 O,
52
- OOBM850104 Optimized average non-bonded energy per atom,
53
- KHAG800101 The Kerr-constant increments,
54
- NADH010107 Hydropathy scale (50% accessibility),
55
- ROBB760107 Information measure for extended without H-bond,
56
- FINA770101 Helix-coil equilibrium constant,
57
- ARGP820102 Signal sequence helical potential.
59
Upper-case one-letter-code can be used to access the properties of a single amino acid.
63
class OPENMS_DLLAPI AAIndex
67
/// Returns if the residue is aliphatic (1.0 or 0.0)
68
static DoubleReal aliphatic(char aa)
70
if( aa == 'A' || aa == 'G' || aa == 'F' || aa == 'I' || aa == 'M' || aa == 'L' || aa == 'P' || aa == 'V' )
80
/// Returns if the residue is acidic (1.0 or 0.0)
81
static DoubleReal acidic(char aa)
83
if( aa == 'D' || aa == 'E' )
93
/// Returns if the residue is basic (1.0 or 0.0)
94
static DoubleReal basic(char aa)
96
if( aa == 'K' || aa == 'R' || aa == 'H' || aa == 'W' )
106
/// Returns if the residue is polar (1.0 or 0.0)
107
static DoubleReal polar(char aa)
109
if( aa == 'S' || aa == 'T' || aa == 'Y' || aa == 'H' || aa == 'C' || aa == 'N' || aa == 'Q' || aa == 'W' )
119
//I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
120
//49.1 133. -3.6 0. 0. 20. 0. 64.6 75.7 18.9
121
//15.6 0. 6.8 54.7 43.8 44.4 31.0 70.5 0. 29.5
123
@brief The Kerr-constant increments (Khanarian-Moore, 1980)
126
Khanarian, G. and Moore, W.J.<br>
127
The Kerr effect of amino acids in water<br>
128
Aust. J. Chem. 33, 1727-1741 (1980) (Cys Lys Tyr !)
130
@exception InvalidValue is thrown if an undefined one-letter-code is used
132
static DoubleReal getKHAG800101(char aa)
197
throw Exception::InvalidValue(__FILE__,__LINE__,__PRETTY_FUNCTION__, "Unkown amino acid one-letter-code",String(aa));
201
//I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
202
//0.159 0.194 0.385 0.283 0.187 0.236 0.206 0.049 0.233 0.581
203
//0.083 0.159 0.198 0.682 0.366 0.150 0.074 0.463 0.737 0.301
206
@brief Relative population of conformational state E (Vasquez et al., 1983)
209
Vasquez, M., Nemethy, G. and Scheraga, H.A.<br>
210
Computed conformational states of the 20 naturally occurring amino acid
211
residues and of the prototype residue alpha-aminobutyric acid<br>
212
Macromolecules 16, 1043-1049 (1983) (Pro !)
214
@exception InvalidValue is thrown if an undefined one-letter-code is used
216
static DoubleReal getVASM830103(char aa)
281
throw Exception::InvalidValue(__FILE__,__LINE__,__PRETTY_FUNCTION__, "Unkown amino acid one-letter-code",String(aa));
286
//NADH010105 0.958 NADH010104 0.914 NADH010103 0.881<br>
287
//ZHOH040103 0.819 NADH010107 0.811 BAEK050101 0.809<br>
288
//NADH010102 0.808 PONP800103 0.803 VINM940103 -0.813<br>
289
//KRIW710101 -0.846 KRIW790101 -0.861
290
//I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
291
//5 -57 -77 45 224 -67 -8 -47 -50 83
292
//82 -38 83 117 -103 -41 79 130 27 117
295
@brief Hydropathy scale based on self-information values in the two-state model (36% accessibility) (Naderi-Manesh et al., 2001)
298
Naderi-Manesh, H., Sadeghi, M., Arab, S. and Moosavi Movahedi, A.A.<br>
299
Prediction of protein surface accessibility with information theory<br>
300
Proteins. 42, 452-459 (2001)
302
@exception InvalidValue is thrown if an undefined one-letter-code is used
304
static DoubleReal getNADH010106(char aa)
369
throw Exception::InvalidValue(__FILE__,__LINE__,__PRETTY_FUNCTION__, "Unkown amino acid one-letter-code",String(aa));
374
//I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
375
//-2 -41 -97 248 329 -37 117 -66 -70 28
376
//36 115 62 120 -132 -52 174 179 -7 114
379
@brief Hydropathy scale based on self-information values in the two-state model (50% accessibility) (Naderi-Manesh et al., 2001)
382
Naderi-Manesh, H., Sadeghi, M., Arab, S. and Moosavi Movahedi, A.A.<br>
383
Prediction of protein surface accessibility with information theory<br>
384
Proteins. 42, 452-459 (2001)
386
@exception InvalidValue is thrown if an undefined one-letter-code is used
388
static DoubleReal getNADH010107(char aa)
453
throw Exception::InvalidValue(__FILE__,__LINE__,__PRETTY_FUNCTION__, "Unkown amino acid one-letter-code",String(aa));
457
//WILM950101 0.838 MEEJ810102 0.809
458
//I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
459
//2.62 1.26 -1.27 -2.84 0.73 -1.69 -0.45 -1.15 -0.74 4.38
460
//6.57 -2.78 -3.12 9.14 -0.12 -1.39 1.81 5.91 1.39 2.30
463
@brief Hydrophobicity coefficient in RP-HPLC, C8 with 0.1%TFA/MeCN/H2O (Wilce et al. 1995)
465
Wilce, M.C., Aguilar, M.I. and Hearn, M.T.<br>
466
Physicochemical basis of amino acid hydrophobicity scales: evaluation of four
467
new scales of amino acid hydrophobicity coefficients derived from RP-HPLC of
469
Anal Chem. 67, 1210-1219 (1995)
471
@exception InvalidValue is thrown if an undefined one-letter-code is used
473
static DoubleReal getWILM950102(char aa)
538
throw Exception::InvalidValue(__FILE__,__LINE__,__PRETTY_FUNCTION__, "Unkown amino acid one-letter-code",String(aa));
543
//I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
544
//0.0 1.1 -2.0 -2.6 5.4 2.4 3.1 -3.4 0.8 -0.1
545
//-3.7 -3.1 -2.1 0.7 7.4 1.3 0.0 -3.4 4.8 2.7
548
@brief Information measure for extended without H-bond (Robson-Suzuki, 1976)
551
Robson, B. and Suzuki, E.<br>
552
Conformational properties of amino acid residues in globular proteins<br>
553
J. Mol. Biol. 107, 327-356 (1976)
555
@exception InvalidValue is thrown if an undefined one-letter-code is used
557
static DoubleReal getROBB760107(char aa)
622
throw Exception::InvalidValue(__FILE__,__LINE__,__PRETTY_FUNCTION__, "Unkown amino acid one-letter-code",String(aa));
627
//I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
628
//-2.49 2.55 2.27 8.86 -3.13 1.79 4.04 -0.56 4.22 -10.87
629
//-7.16 -9.97 -4.96 -6.64 5.19 -1.60 -4.75 -17.84 9.25 -3.97
632
@brief Optimized average non-bonded energy per atom (Oobatake et al., 1985)
635
Oobatake, M., Kubota, Y. and Ooi, T.<br>
636
Optimization of amino acid parameters for correspondence of sequence to
637
tertiary structures of proteuins<br>
638
Bull. Inst. Chem. Res., Kyoto Univ. 63, 82-94 (1985)
640
@exception InvalidValue is thrown if an undefined one-letter-code is used
642
static DoubleReal getOOBM850104(char aa)
707
throw Exception::InvalidValue(__FILE__,__LINE__,__PRETTY_FUNCTION__, "Unkown amino acid one-letter-code",String(aa));
712
//I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
713
//0. 1. 0. 0. 0. 0. 0. 0. 1. 0.
714
//0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
717
@brief Positive charge (Fauchere et al., 1988)
719
LIT:1414114 PMID:3209351<br>
720
Fauchere, J.L., Charton, M., Kier, L.B., Verloop, A. and Pliska, V.<br>
721
Amino acid side chain parameters for correlation studies in biology and
723
Int. J. Peptide Protein Res. 32, 269-278 (1988)
725
@exception InvalidValue is thrown if an undefined one-letter-code is used
727
static DoubleReal getFAUJ880111(char aa)
792
throw Exception::InvalidValue(__FILE__,__LINE__,__PRETTY_FUNCTION__, "Unkown amino acid one-letter-code",String(aa));
796
//SUEM840101 0.883 AURR980114 0.875 AURR980113 0.849<br>
797
//PTIO830101 0.826 KANM800103 0.823 QIAN880107 0.814<br>
798
//QIAN880106 0.810 MAXF760101 0.810 AURR980109 0.802
799
//I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
800
//1.08 1.05 0.85 0.85 0.95 0.95 1.15 0.55 1.00 1.05
801
//1.25 1.15 1.15 1.10 0.71 0.75 0.75 1.10 1.10 0.95
804
@brief Helix-coil equilibrium constant (Finkelstein-Ptitsyn, 1977)
806
LIT:2004052b PMID:843599<br>
807
Finkelstein, A.V. and Ptitsyn, O.B.<br>
808
Theory of protein molecule self-organization. II. A comparison of calculated
809
thermodynamic parameters of local secondary structures with experiments<br>
810
Biopolymers 16, 497-524 (1977) (Pro 0.096)
812
@exception InvalidValue is thrown if an undefined one-letter-code is used
814
static DoubleReal getFINA770101(char aa)
879
throw Exception::InvalidValue(__FILE__,__LINE__,__PRETTY_FUNCTION__, "Unkown amino acid one-letter-code",String(aa));
883
//ARGP820103 0.961 KYTJ820101 0.803 JURD980101 0.802
884
//I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
885
//1.18 0.20 0.23 0.05 1.89 0.72 0.11 0.49 0.31 1.45
886
//3.23 0.06 2.67 1.96 0.76 0.97 0.84 0.77 0.39 1.08
889
@brief Signal sequence helical potential (Argos et al., 1982)
891
LIT:0901079b PMID:7151796<br>
892
Argos, P., Rao, J.K.M. and Hargrave, P.A.<br>
893
Structural prediction of membrane-bound proteins<br>
894
Eur. J. Biochem. 128, 565-575 (1982)
896
@exception InvalidValue is thrown if an undefined one-letter-code is used
898
static DoubleReal getARGP820102(char aa)
963
throw Exception::InvalidValue(__FILE__,__LINE__,__PRETTY_FUNCTION__, "Unkown amino acid one-letter-code",String(aa));
968
@brief Calculates an estimated gas-phase basicity for an amino acid sequence at a given temperature
970
Energy level E at each protonation site i is -GB(i) fractional proton population of a microstate k is <br>
971
P_k = exp (- E_k/(RT)) / ( sum_i exp (- E_i/(RT))) <br>
972
The apparent proton association constant K_app: K_app = sum_i GB(i)/(RT)<br>
973
Then the apparent GB is GB_app^ion = R * T * ln(K_app)
975
@exception InvalidValue is thrown if an undefined one-letter-code is used
977
static DoubleReal calculateGB(const AASequence& seq, DoubleReal T=500.0)
980
DoubleReal R = Constants::GAS_CONSTANT/1000.0; // ideal gas constant in kj/(K*mol)
985
DoubleReal k_app = 0.0; // apparent proton association constant
987
// energy level E at each protonation site i is -GB(i)
988
// fractional proton population of a microstate k is
989
// P_k = exp (- E_k/(RT)) / ( sum_i exp (- E_i/(RT)))
990
// the apparent proton association constant k_app:
991
// k_app = sum_i GB(i)/(RT)
992
// then the apparent GB is GB_app^ion = R * T * ln(k_app)
993
for (Size i = 0; i <= seq.size(); i++)
995
// aa left to current one
998
Residue leftchar = seq[i-1];
999
left = leftchar.getOneLetterCode()[0];
1002
// aa right to current one
1009
Residue rightchar = seq[i];
1010
right = rightchar.getOneLetterCode()[0];
1012
DoubleReal contrib = exp((GBleft_(left) + GBdeltaright_(right))/(R*T));
1013
if(i > 0 && i < seq.size())
1015
contrib += exp(GBsidechain_(right)/(R*T));
1019
// calculate apparent GB
1020
return R * T * log(k_app)/log(2.0);
1026
@brief Calculates part of the gas-phase basicity
1028
For a detailed description see @ref calculateGB(const AASequence&, DoubleReal) .
1030
@exception InvalidValue is thrown if an undefined one-letter-code is used
1032
static DoubleReal GBsidechain_(char aa)
1097
throw Exception::InvalidValue(__FILE__,__LINE__,__PRETTY_FUNCTION__, "Unkown amino acid one-letter-code",String(aa));
1100
return 0.0; //this should never be reached, but is necessary to suppress compiler warngins on older compilers
1105
@brief Calculates part of the gas-phase basicity
1107
For a detailed description see @ref calculateGB(const AASequence&, DoubleReal) .
1109
@exception InvalidValue is thrown if an undefined one-letter-code is used
1111
static DoubleReal GBleft_(char aa)
1179
throw Exception::InvalidValue(__FILE__,__LINE__,__PRETTY_FUNCTION__, "Unkown amino acid one-letter-code",String(aa));
1183
return 0.0; //this should never be reached, but is necessary to suppress compiler warngins on older compilers
1187
@brief Calculates part of the gas-phase basicity
1189
For a detailed description see @ref calculateGB(const AASequence&, DoubleReal) .
1191
@exception InvalidValue is thrown if an undefined one-letter-code is used
1193
static DoubleReal GBdeltaright_(char aa)
1261
throw Exception::InvalidValue(__FILE__,__LINE__,__PRETTY_FUNCTION__, "Unkown amino acid one-letter-code",String(aa));
1264
return 0.0; //this should never be reached, but is necessary to suppress compiler warngins on older compilers
1269
///Constructor not implemented => private