1
// -*- mode: C++; tab-width: 2; -*-
4
// --------------------------------------------------------------------------
5
// OpenMS Mass Spectrometry Framework
6
// --------------------------------------------------------------------------
7
// Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
9
// This library is free software; you can redistribute it and/or
10
// modify it under the terms of the GNU Lesser General Public
11
// License as published by the Free Software Foundation; either
12
// version 2.1 of the License, or (at your option) any later version.
14
// This library is distributed in the hope that it will be useful,
15
// but WITHOUT ANY WARRANTY; without even the implied warranty of
16
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
// Lesser General Public License for more details.
19
// You should have received a copy of the GNU Lesser General Public
20
// License along with this library; if not, write to the Free Software
21
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1
// --------------------------------------------------------------------------
2
// OpenMS -- Open-Source Mass Spectrometry
3
// --------------------------------------------------------------------------
4
// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5
// ETH Zurich, and Freie Universitaet Berlin 2002-2013.
7
// This software is released under a three-clause BSD license:
8
// * Redistributions of source code must retain the above copyright
9
// notice, this list of conditions and the following disclaimer.
10
// * Redistributions in binary form must reproduce the above copyright
11
// notice, this list of conditions and the following disclaimer in the
12
// documentation and/or other materials provided with the distribution.
13
// * Neither the name of any author or any participating institution
14
// may be used to endorse or promote products derived from this software
15
// without specific prior written permission.
16
// For a full list of authors, refer to the file AUTHORS.
17
// --------------------------------------------------------------------------
18
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22
// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23
30
// --------------------------------------------------------------------------
24
31
// $Maintainer: Alexandra Zerck $
36
InternalCalibration::InternalCalibration()
37
:DefaultParamHandler("InternalCalibration"),
39
trafo_(TransformationDescription::DataPoints())
41
defaults_.setValue("mz_tolerance",1.,"Allowed tolerance between peak and reference m/z.");
42
defaults_.setMinFloat("mz_tolerance",0.);
43
defaults_.setValue("mz_tolerance_unit","Da","Unit for mz_tolerance.");
44
defaults_.setValidStrings("mz_tolerance_unit",StringList::create("Da,ppm"));
45
defaults_.setValue("rt_tolerance",10,"Allowed tolerance between peak and reference rt.");
46
// defaults_.setValue("hires:percentage",30,"Percentage of spectra a signal has to appear in to be considered as background signal.");
50
void InternalCalibration::checkReferenceIds_(std::vector<PeptideIdentification>& pep_ids)
52
for(Size p_id = 0; p_id < pep_ids.size();++p_id)
54
if(pep_ids[p_id].getHits().size() > 1)
56
throw Exception::InvalidParameter(__FILE__,__LINE__,__PRETTY_FUNCTION__, "InternalCalibration: Your Id-file contains PeptideIdentifications with more than one hit, use the IDFilter to select only the best hits.");
58
if(!pep_ids[p_id].metaValueExists("RT"))
60
throw Exception::MissingInformation(__FILE__,__LINE__,__PRETTY_FUNCTION__, "InternalCalibration: meta data value 'RT' missing for peptide identification!");
62
if(!pep_ids[p_id].metaValueExists("MZ"))
64
throw Exception::MissingInformation(__FILE__,__LINE__,__PRETTY_FUNCTION__, "InternalCalibration: meta data value 'MZ' missing for peptide identification!");
70
void InternalCalibration::makeLinearRegression_(std::vector<DoubleReal>& observed_masses, std::vector<DoubleReal>& theoretical_masses)
72
if(observed_masses.size() != theoretical_masses.size())
74
throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__,"Number of observed and theoretical masses must agree.");
76
#ifdef DEBUG_CALIBRATION
77
std::ofstream out("calibration_regression.txt");
78
std::vector<DoubleReal> rel_errors(observed_masses.size(),0.);
79
// determine rel error in ppm for the two reference masses
80
for(Size ref_peak=0; ref_peak < observed_masses.size();++ref_peak)
82
rel_errors[ref_peak] = (theoretical_masses[ref_peak]-observed_masses[ref_peak])/theoretical_masses[ref_peak] * 1e6;
84
out << observed_masses[ref_peak] << "\t"<< rel_errors[ref_peak] << "\n";
85
std::cout << observed_masses[ref_peak] <<" "<<theoretical_masses[ref_peak]<<std::endl;
86
// std::cout << observed_masses[ref_peak]<<"\t"<<rel_errors[ref_peak]<<std::endl;
90
TransformationDescription::DataPoints data;
91
for (Size i = 0; i < observed_masses.size(); ++i)
93
data.push_back(std::make_pair(observed_masses[i],
94
theoretical_masses[i]));
97
trafo_ = TransformationDescription(data);
98
trafo_.fitModel("linear", Param());
100
#ifdef DEBUG_CALIBRATION
101
// std::cout <<"\n\n---------------------------------\n\n"<< "after calibration "<<std::endl;
102
for(Size i = 0; i < observed_masses.size();++i)
104
DoubleReal new_mass = trafo_.apply(observed_masses[i]);
106
DoubleReal rel_error = (theoretical_masses[i]-(new_mass))/theoretical_masses[i] * 1e6;
107
std::cout << observed_masses[i]<<"\t"<<rel_error<<std::endl;
114
void InternalCalibration::calibrateMapGlobally(const FeatureMap<>& feature_map, FeatureMap<>& calibrated_feature_map,
115
String trafo_file_name)
118
checkReferenceIds_(feature_map);
119
// first collect theoretical and observed m/z values
120
std::vector<DoubleReal> observed_masses;
121
std::vector<DoubleReal> theoretical_masses;
122
for(Size f = 0; f < feature_map.size();++f)
124
// if more than one peptide id exists for this feature we can't use it as reference
125
if(feature_map[f].getPeptideIdentifications().size() > 1) continue;
126
if(!feature_map[f].getPeptideIdentifications().empty())
128
Int charge = feature_map[f].getPeptideIdentifications()[0].getHits()[0].getCharge();
129
DoubleReal theo_mass = feature_map[f].getPeptideIdentifications()[0].getHits()[0].getSequence().getMonoWeight(Residue::Full,charge)/(DoubleReal)charge;
130
theoretical_masses.push_back(theo_mass);
131
observed_masses.push_back(feature_map[f].getMZ());
132
#ifdef DEBUG_CALIBRATION
133
std::cout << feature_map[f].getRT() <<" " <<feature_map[f].getMZ() <<" "<<theo_mass<<std::endl;
134
std::cout << feature_map[f].getPeptideIdentifications()[0].getHits().size()<<std::endl;
135
std::cout << feature_map[f].getPeptideIdentifications()[0].getHits()[0].getSequence()<<std::endl;
136
std::cout << feature_map[f].getPeptideIdentifications()[0].getHits()[0].getCharge()<<std::endl;
140
// then make the linear regression
141
makeLinearRegression_(observed_masses,theoretical_masses);
142
// apply transformation
143
applyTransformation_(feature_map,calibrated_feature_map);
144
if(trafo_file_name != "")
146
TransformationXMLFile().store(trafo_file_name,trafo_);
151
void InternalCalibration::calibrateMapGlobally(const FeatureMap<>& feature_map, FeatureMap<>& calibrated_feature_map,std::vector<PeptideIdentification>& ref_ids,String trafo_file_name)
43
InternalCalibration::InternalCalibration() :
44
DefaultParamHandler("InternalCalibration"),
46
trafo_(TransformationDescription::DataPoints())
48
defaults_.setValue("mz_tolerance", 1., "Allowed tolerance between peak and reference m/z.");
49
defaults_.setMinFloat("mz_tolerance", 0.);
50
defaults_.setValue("mz_tolerance_unit", "Da", "Unit for mz_tolerance.");
51
defaults_.setValidStrings("mz_tolerance_unit", StringList::create("Da,ppm"));
52
defaults_.setValue("rt_tolerance", 10, "Allowed tolerance between peak and reference rt.");
53
// defaults_.setValue("hires:percentage",30,"Percentage of spectra a signal has to appear in to be considered as background signal.");
57
void InternalCalibration::checkReferenceIds_(std::vector<PeptideIdentification> & pep_ids)
59
for (Size p_id = 0; p_id < pep_ids.size(); ++p_id)
61
if (pep_ids[p_id].getHits().size() > 1)
63
throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "InternalCalibration: Your Id-file contains PeptideIdentifications with more than one hit, use the IDFilter to select only the best hits.");
65
if (!pep_ids[p_id].metaValueExists("RT"))
67
throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, "InternalCalibration: meta data value 'RT' missing for peptide identification!");
69
if (!pep_ids[p_id].metaValueExists("MZ"))
71
throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, "InternalCalibration: meta data value 'MZ' missing for peptide identification!");
76
void InternalCalibration::makeLinearRegression_(std::vector<DoubleReal> & observed_masses, std::vector<DoubleReal> & theoretical_masses)
78
if (observed_masses.size() != theoretical_masses.size())
80
throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Number of observed and theoretical masses must agree.");
82
#ifdef DEBUG_CALIBRATION
83
std::ofstream out("calibration_regression.txt");
84
std::vector<DoubleReal> rel_errors(observed_masses.size(), 0.);
85
// determine rel error in ppm for the two reference masses
86
for (Size ref_peak = 0; ref_peak < observed_masses.size(); ++ref_peak)
88
rel_errors[ref_peak] = (theoretical_masses[ref_peak] - observed_masses[ref_peak]) / theoretical_masses[ref_peak] * 1e6;
90
out << observed_masses[ref_peak] << "\t" << rel_errors[ref_peak] << "\n";
91
std::cout << observed_masses[ref_peak] << " " << theoretical_masses[ref_peak] << std::endl;
92
// std::cout << observed_masses[ref_peak]<<"\t"<<rel_errors[ref_peak]<<std::endl;
96
TransformationDescription::DataPoints data;
97
for (Size i = 0; i < observed_masses.size(); ++i)
99
data.push_back(std::make_pair(observed_masses[i],
100
theoretical_masses[i]));
103
trafo_ = TransformationDescription(data);
104
trafo_.fitModel("linear", Param());
106
#ifdef DEBUG_CALIBRATION
107
// std::cout <<"\n\n---------------------------------\n\n"<< "after calibration "<<std::endl;
108
for (Size i = 0; i < observed_masses.size(); ++i)
110
DoubleReal new_mass = trafo_.apply(observed_masses[i]);
112
DoubleReal rel_error = (theoretical_masses[i] - (new_mass)) / theoretical_masses[i] * 1e6;
113
std::cout << observed_masses[i] << "\t" << rel_error << std::endl;
118
void InternalCalibration::calibrateMapGlobally(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map,
119
String trafo_file_name)
122
checkReferenceIds_(feature_map);
123
// first collect theoretical and observed m/z values
124
std::vector<DoubleReal> observed_masses;
125
std::vector<DoubleReal> theoretical_masses;
126
for (Size f = 0; f < feature_map.size(); ++f)
128
// if more than one peptide id exists for this feature we can't use it as reference
129
if (feature_map[f].getPeptideIdentifications().size() > 1)
131
if (!feature_map[f].getPeptideIdentifications().empty())
133
Int charge = feature_map[f].getPeptideIdentifications()[0].getHits()[0].getCharge();
134
DoubleReal theo_mass = feature_map[f].getPeptideIdentifications()[0].getHits()[0].getSequence().getMonoWeight(Residue::Full, charge) / (DoubleReal)charge;
135
theoretical_masses.push_back(theo_mass);
136
observed_masses.push_back(feature_map[f].getMZ());
137
#ifdef DEBUG_CALIBRATION
138
std::cout << feature_map[f].getRT() << " " << feature_map[f].getMZ() << " " << theo_mass << std::endl;
139
std::cout << feature_map[f].getPeptideIdentifications()[0].getHits().size() << std::endl;
140
std::cout << feature_map[f].getPeptideIdentifications()[0].getHits()[0].getSequence() << std::endl;
141
std::cout << feature_map[f].getPeptideIdentifications()[0].getHits()[0].getCharge() << std::endl;
145
// then make the linear regression
146
makeLinearRegression_(observed_masses, theoretical_masses);
147
// apply transformation
148
applyTransformation_(feature_map, calibrated_feature_map);
149
if (trafo_file_name != "")
151
TransformationXMLFile().store(trafo_file_name, trafo_);
155
void InternalCalibration::calibrateMapGlobally(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map, std::vector<PeptideIdentification> & ref_ids, String trafo_file_name)
153
157
checkReferenceIds_(ref_ids);
155
159
calibrated_feature_map = feature_map;
157
for(Size f = 0;f < calibrated_feature_map.size();++f)
161
for (Size f = 0; f < calibrated_feature_map.size(); ++f)
159
calibrated_feature_map[f].getPeptideIdentifications().clear();
163
calibrated_feature_map[f].getPeptideIdentifications().clear();
162
166
// map the reference ids onto the features
165
param.setValue("rt_tolerance",(DoubleReal)param_.getValue("rt_tolerance"));
166
param.setValue("mz_tolerance",param_.getValue("mz_tolerance"));
167
param.setValue("mz_measure",param_.getValue("mz_tolerance_unit"));
169
param.setValue("rt_tolerance", (DoubleReal)param_.getValue("rt_tolerance"));
170
param.setValue("mz_tolerance", param_.getValue("mz_tolerance"));
171
param.setValue("mz_measure", param_.getValue("mz_tolerance_unit"));
168
172
mapper.setParameters(param);
169
173
std::vector<ProteinIdentification> vec;
170
mapper.annotate(calibrated_feature_map,ref_ids,vec);
174
mapper.annotate(calibrated_feature_map, ref_ids, vec);
173
calibrateMapGlobally(calibrated_feature_map,calibrated_feature_map,trafo_file_name);
177
calibrateMapGlobally(calibrated_feature_map, calibrated_feature_map, trafo_file_name);
176
calibrated_feature_map.setUnassignedPeptideIdentifications(feature_map.getUnassignedPeptideIdentifications());
177
for(Size f = 0;f < feature_map.size();++f)
180
calibrated_feature_map.setUnassignedPeptideIdentifications(feature_map.getUnassignedPeptideIdentifications());
181
for (Size f = 0; f < feature_map.size(); ++f)
179
calibrated_feature_map[f].getPeptideIdentifications().clear();
180
if(!feature_map[f].getPeptideIdentifications().empty())
182
calibrated_feature_map[f].setPeptideIdentifications(feature_map[f].getPeptideIdentifications());
183
calibrated_feature_map[f].getPeptideIdentifications().clear();
184
if (!feature_map[f].getPeptideIdentifications().empty())
186
calibrated_feature_map[f].setPeptideIdentifications(feature_map[f].getPeptideIdentifications());
187
void InternalCalibration::applyTransformation_(const FeatureMap<>& feature_map,FeatureMap<>& calibrated_feature_map)
191
void InternalCalibration::applyTransformation_(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map)
189
193
calibrated_feature_map = feature_map;
190
for(Size f = 0; f < feature_map.size();++f)
192
DoubleReal mz = feature_map[f].getMZ();
193
mz = trafo_.apply(mz);
194
calibrated_feature_map[f].setMZ(mz);
196
// apply transformation to convex hulls and subordinates
197
for(Size s = 0; s < calibrated_feature_map[f].getSubordinates().size();++s)
200
DoubleReal mz = calibrated_feature_map[f].getSubordinates()[s].getMZ();
201
mz = trafo_.apply(mz);
202
calibrated_feature_map[f].getSubordinates()[s].setMZ(mz);
204
for(Size s = 0; s < calibrated_feature_map[f].getConvexHulls().size();++s)
207
std::vector<DPosition<2> > point_vec = calibrated_feature_map[f].getConvexHulls()[s].getHullPoints();
208
calibrated_feature_map[f].getConvexHulls()[s].clear();
209
for(Size p = 0; p < point_vec.size(); ++p)
211
DoubleReal mz = point_vec[p][1];
212
mz = trafo_.apply(mz);
213
point_vec[p][1] = mz;
215
calibrated_feature_map[f].getConvexHulls()[s].setHullPoints(point_vec);
220
void InternalCalibration::checkReferenceIds_(const FeatureMap<>& feature_map)
223
for(Size f = 0; f < feature_map.size();++f)
225
if(!feature_map[f].getPeptideIdentifications().empty() && feature_map[f].getPeptideIdentifications()[0].getHits().size() > 1)
227
throw Exception::InvalidParameter(__FILE__,__LINE__,__PRETTY_FUNCTION__, "InternalCalibration: Your feature map contains PeptideIdentifications with more than one hit, use the IDFilter to select only the best hits before you map the ids to the feature map.");
229
else if(!feature_map[f].getPeptideIdentifications().empty()) ++num_ids;
233
throw Exception::InvalidParameter(__FILE__,__LINE__,__PRETTY_FUNCTION__, "InternalCalibration: Your feature map contains less than two PeptideIdentifications, can't perform a linear regression on your data.");
194
for (Size f = 0; f < feature_map.size(); ++f)
196
DoubleReal mz = feature_map[f].getMZ();
197
mz = trafo_.apply(mz);
198
calibrated_feature_map[f].setMZ(mz);
200
// apply transformation to convex hulls and subordinates
201
for (Size s = 0; s < calibrated_feature_map[f].getSubordinates().size(); ++s)
204
DoubleReal mz = calibrated_feature_map[f].getSubordinates()[s].getMZ();
205
mz = trafo_.apply(mz);
206
calibrated_feature_map[f].getSubordinates()[s].setMZ(mz);
208
for (Size s = 0; s < calibrated_feature_map[f].getConvexHulls().size(); ++s)
211
std::vector<DPosition<2> > point_vec = calibrated_feature_map[f].getConvexHulls()[s].getHullPoints();
212
calibrated_feature_map[f].getConvexHulls()[s].clear();
213
for (Size p = 0; p < point_vec.size(); ++p)
215
DoubleReal mz = point_vec[p][1];
216
mz = trafo_.apply(mz);
217
point_vec[p][1] = mz;
219
calibrated_feature_map[f].getConvexHulls()[s].setHullPoints(point_vec);
224
void InternalCalibration::checkReferenceIds_(const FeatureMap<> & feature_map)
227
for (Size f = 0; f < feature_map.size(); ++f)
229
if (!feature_map[f].getPeptideIdentifications().empty() && feature_map[f].getPeptideIdentifications()[0].getHits().size() > 1)
231
throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "InternalCalibration: Your feature map contains PeptideIdentifications with more than one hit, use the IDFilter to select only the best hits before you map the ids to the feature map.");
233
else if (!feature_map[f].getPeptideIdentifications().empty())
238
throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "InternalCalibration: Your feature map contains less than two PeptideIdentifications, can't perform a linear regression on your data.");