1
// -*- mode: C++; tab-width: 2; -*-
4
// --------------------------------------------------------------------------
5
// OpenMS Mass Spectrometry Framework
6
// --------------------------------------------------------------------------
7
// Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
9
// This library is free software; you can redistribute it and/or
10
// modify it under the terms of the GNU Lesser General Public
11
// License as published by the Free Software Foundation; either
12
// version 2.1 of the License, or (at your option) any later version.
14
// This library is distributed in the hope that it will be useful,
15
// but WITHOUT ANY WARRANTY; without even the implied warranty of
16
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
// Lesser General Public License for more details.
19
// You should have received a copy of the GNU Lesser General Public
20
// License along with this library; if not, write to the Free Software
21
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
// --------------------------------------------------------------------------
24
// $Maintainer: Chris Bielow $
25
// $Authors: Marc Sturm, Chris Bielow $
26
// --------------------------------------------------------------------------
28
#include <OpenMS/ANALYSIS/ID/IDMapper.h>
36
: DefaultParamHandler("IDMapper"),
39
measure_(MEASURE_PPM),
42
defaults_.setValue("rt_tolerance", rt_tolerance_, "RT tolerance (in seconds) for the matching");
43
defaults_.setMinFloat("rt_tolerance", 0);
44
defaults_.setValue("mz_tolerance", mz_tolerance_, "m/z tolerance (in ppm or Da) for the matching");
45
defaults_.setMinFloat("mz_tolerance", 0);
46
defaults_.setValue("mz_measure", "ppm", "unit of 'mz_tolerance' (ppm or Da)");
47
defaults_.setValidStrings("mz_measure", StringList::create("ppm,Da"));
48
defaults_.setValue("mz_reference", "precursor", "source of m/z values for peptide identifications");
49
defaults_.setValidStrings("mz_reference", StringList::create("precursor,peptide"));
51
defaults_.setValue("ignore_charge", "false", "For feature/consensus maps: Assign an ID independently of whether its charge state matches that of the (consensus) feature.");
52
defaults_.setValidStrings("ignore_charge", StringList::create("true,false"));
58
IDMapper::IDMapper(const IDMapper& cp)
59
: DefaultParamHandler(cp),
60
rt_tolerance_(cp.rt_tolerance_),
61
mz_tolerance_(cp.mz_tolerance_),
62
measure_(cp.measure_),
63
ignore_charge_(cp.ignore_charge_)
68
IDMapper& IDMapper::operator= (const IDMapper& rhs)
70
if (this == &rhs) return *this;
72
DefaultParamHandler::operator= (rhs);
73
rt_tolerance_=rhs.rt_tolerance_;
74
mz_tolerance_=rhs.mz_tolerance_;
75
measure_=rhs.measure_;
76
ignore_charge_ = rhs.ignore_charge_;
82
void IDMapper::updateMembers_()
84
rt_tolerance_ = param_.getValue("rt_tolerance");
85
mz_tolerance_ = param_.getValue("mz_tolerance");
86
measure_ = param_.getValue("mz_measure")=="ppm"? MEASURE_PPM : MEASURE_DA;
87
ignore_charge_ = param_.getValue("ignore_charge") == "true";
90
void IDMapper::annotate(ConsensusMap& map, const std::vector<PeptideIdentification>& ids, const std::vector<ProteinIdentification>& protein_ids, bool measure_from_subelements)
92
// validate "RT" and "MZ" metavalues exist
95
//append protein identifications to Map
96
map.getProteinIdentifications().insert(map.getProteinIdentifications().end(),protein_ids.begin(),protein_ids.end());
98
//keep track of assigned/unassigned peptide identifications
99
std::map<Size,Size> assigned;
101
// store which peptides fit which feature (and avoid double entries)
102
// consensusMap -> {peptide_index}
103
std::vector < std::set< size_t> > mapping(map.size());
105
DoubleList mz_values;
109
//iterate over the peptide IDs
110
for (Size i=0; i<ids.size(); ++i)
112
if (ids[i].getHits().empty()) continue;
114
getIDDetails_(ids[i], rt_pep, mz_values, charges);
116
//iterate over the features
117
for(Size cm_index = 0 ; cm_index<map.size(); ++cm_index)
119
// if set to TRUE, we leave the i_mz-loop as we added the whole ID with all hits
120
bool was_added=false; // was current pep-m/z matched?!
122
// iterate over m/z values of pepIds
123
for (Size i_mz = 0; i_mz < mz_values.size(); ++i_mz)
125
DoubleReal mz_pep = mz_values[i_mz];
127
// charge states to use for checking:
128
IntList current_charges;
131
// if "mz_ref." is "precursor", we have only one m/z value to check,
132
// but still one charge state per peptide hit that could match:
133
if (mz_values.size() == 1)
135
current_charges = charges;
137
else current_charges << charges[i_mz];
138
current_charges << 0; // "not specified" always matches
141
//check if we compare distance from centroid or subelements
142
if (!measure_from_subelements)
144
if (isMatch_(rt_pep - map[cm_index].getRT(), mz_pep, map[cm_index].getMZ()) && (ignore_charge_ || current_charges.contains(map[cm_index].getCharge())))
147
map[cm_index].getPeptideIdentifications().push_back(ids[i]);
153
for(ConsensusFeature::HandleSetType::const_iterator it_handle = map[cm_index].getFeatures().begin();
154
it_handle != map[cm_index].getFeatures().end();
157
if (isMatch_(rt_pep - it_handle->getRT(), mz_pep, it_handle->getMZ()) && (ignore_charge_ || current_charges.contains(it_handle->getCharge())))
160
if (mapping[cm_index].count(i) == 0)
162
map[cm_index].getPeptideIdentifications().push_back(ids[i]);
164
mapping[cm_index].insert(i);
166
break; // we added this peptide already.. no need to check other handles
172
if (was_added) break;
174
} // m/z values to check
182
Size matches_none(0);
183
Size matches_single(0);
184
Size matches_multi(0);
186
//append unassigned peptide identifications
187
for (Size i=0; i<ids.size(); ++i)
191
map.getUnassignedPeptideIdentifications().push_back(ids[i]);
194
else if (assigned[i]==1)
198
else if (assigned[i]>1)
204
//some statistics output
205
LOG_INFO << "Unassigned peptides: " << matches_none << "\n"
206
<< "Peptides assigned to exactly one feature: "
207
<< matches_single << "\n"
208
<< "Peptides assigned to multiple features: "
209
<< matches_multi << std::endl;
213
DoubleReal IDMapper::getAbsoluteMZTolerance_(const DoubleReal mz) const
215
if (measure_==MEASURE_PPM)
217
return (mz * mz_tolerance_ / 1e6);
219
else if (measure_==MEASURE_DA)
221
return mz_tolerance_;
223
throw Exception::InvalidValue(__FILE__, __LINE__, __PRETTY_FUNCTION__, "IDMapper::getAbsoluteTolerance_(): illegal internal state of measure_!", String(measure_));
226
bool IDMapper::isMatch_(const DoubleReal rt_distance, const DoubleReal mz_theoretical, const DoubleReal mz_observed) const
228
if (measure_==MEASURE_PPM)
230
return (fabs(rt_distance) <= rt_tolerance_) && (fabs((mz_theoretical*1e6-mz_observed*1e6)/mz_theoretical) <= mz_tolerance_);
232
else if (measure_==MEASURE_DA)
234
return (fabs(rt_distance) <= rt_tolerance_) && (fabs(mz_theoretical-mz_observed) <= mz_tolerance_);
236
throw Exception::InvalidValue(__FILE__, __LINE__, __PRETTY_FUNCTION__, "IDMapper::getAbsoluteTolerance_(): illegal internal state of measure_!", String(measure_));
239
void IDMapper::checkHits_(const std::vector<PeptideIdentification>& ids) const
241
for (Size i=0; i<ids.size(); ++i)
243
if (!ids[i].metaValueExists("RT"))
245
throw Exception::MissingInformation(__FILE__,__LINE__,__PRETTY_FUNCTION__, "IDMapper: meta data value 'RT' missing for peptide identification!");
247
if (!ids[i].metaValueExists("MZ"))
249
throw Exception::MissingInformation(__FILE__,__LINE__,__PRETTY_FUNCTION__, "IDMapper: meta data value 'MZ' missing for peptide identification!");
254
void IDMapper::getIDDetails_(const PeptideIdentification& id, DoubleReal& rt_pep, DoubleList& mz_values, IntList& charges, bool use_avg_mass) const
259
rt_pep = id.getMetaValue("RT");
261
// collect m/z values of pepId
262
if (param_.getValue("mz_reference") == "precursor")
263
{ // use precursor m/z of pepId
264
mz_values << id.getMetaValue("MZ");
267
for (vector<PeptideHit>::const_iterator hit_it = id.getHits().begin();
268
hit_it != id.getHits().end(); ++hit_it)
270
Int charge = hit_it->getCharge();
273
if (param_.getValue("mz_reference") == "peptide")
274
{ // use mass of each pepHit (assuming H+ adducts)
275
DoubleReal mass = use_avg_mass ?
276
hit_it->getSequence().getAverageWeight(Residue::Full, charge) :
277
hit_it->getSequence().getMonoWeight(Residue::Full, charge);
279
mz_values << mass / (DoubleReal) charge;
285
void IDMapper::increaseBoundingBox_(DBoundingBox<2>& box)
287
DPosition<2> sub_min(rt_tolerance_,
288
getAbsoluteMZTolerance_(box.minPosition().getY())),
289
add_max(rt_tolerance_, getAbsoluteMZTolerance_(box.maxPosition().getY()));
291
box.setMin(box.minPosition() - sub_min);
292
box.setMax(box.maxPosition() + add_max);
296
bool IDMapper::checkMassType_(const vector<DataProcessing>& processing) const
298
bool use_avg_mass = false;
300
for (vector<DataProcessing>::const_iterator proc_it = processing.begin();
301
proc_it != processing.end(); ++proc_it)
303
if (proc_it->getSoftware().getName() == "FeatureFinder")
305
String reported_mz = proc_it->
306
getMetaValue("parameter: algorithm:feature:reported_mz");
307
if (reported_mz.empty()) continue; // parameter info not available
308
if (!before.empty() && (reported_mz != before))
310
LOG_WARN << "The m/z values reported for features in the input seem to be of different types (e.g. monoisotopic/average). They will all be compared against monoisotopic peptide masses, but the mapping results may not be meaningful in the end." << endl;
313
if (reported_mz == "average")
317
else if (reported_mz == "maximum")
319
LOG_WARN << "For features, m/z values from the highest mass traces are reported. This type of m/z value is not available for peptides, so the comparison has to be done using average peptide masses." << endl;
322
before = reported_mz;
329
} // namespace OpenMS