~ubuntu-branches/ubuntu/wily/openms/wily

« back to all changes in this revision

Viewing changes to source/ANALYSIS/ID/IDMapper.C

  • Committer: Package Import Robot
  • Author(s): Filippo Rusconi
  • Date: 2012-11-12 15:58:12 UTC
  • Revision ID: package-import@ubuntu.com-20121112155812-vr15wtg9b50cuesg
Tags: upstream-1.9.0
ImportĀ upstreamĀ versionĀ 1.9.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
// -*- mode: C++; tab-width: 2; -*-
 
2
// vi: set ts=2:
 
3
//
 
4
// --------------------------------------------------------------------------
 
5
//                   OpenMS Mass Spectrometry Framework
 
6
// --------------------------------------------------------------------------
 
7
//  Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
 
8
//
 
9
//  This library is free software; you can redistribute it and/or
 
10
//  modify it under the terms of the GNU Lesser General Public
 
11
//  License as published by the Free Software Foundation; either
 
12
//  version 2.1 of the License, or (at your option) any later version.
 
13
// 
 
14
//  This library is distributed in the hope that it will be useful,
 
15
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
 
16
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 
17
//  Lesser General Public License for more details.
 
18
//
 
19
//  You should have received a copy of the GNU Lesser General Public
 
20
//  License along with this library; if not, write to the Free Software
 
21
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
22
//
 
23
// --------------------------------------------------------------------------
 
24
// $Maintainer: Chris Bielow $
 
25
// $Authors: Marc Sturm, Chris Bielow $
 
26
// --------------------------------------------------------------------------
 
27
 
 
28
#include <OpenMS/ANALYSIS/ID/IDMapper.h>
 
29
 
 
30
using namespace std;
 
31
 
 
32
namespace OpenMS 
 
33
{
 
34
 
 
35
        IDMapper::IDMapper()
 
36
        : DefaultParamHandler("IDMapper"),
 
37
                rt_tolerance_(5.0),
 
38
                mz_tolerance_(20),
 
39
                measure_(MEASURE_PPM),
 
40
                        ignore_charge_(false)
 
41
  {
 
42
                defaults_.setValue("rt_tolerance", rt_tolerance_, "RT tolerance (in seconds) for the matching"); 
 
43
                defaults_.setMinFloat("rt_tolerance", 0);
 
44
                defaults_.setValue("mz_tolerance", mz_tolerance_, "m/z tolerance (in ppm or Da) for the matching"); 
 
45
                defaults_.setMinFloat("mz_tolerance", 0);
 
46
                defaults_.setValue("mz_measure", "ppm", "unit of 'mz_tolerance' (ppm or Da)"); 
 
47
                defaults_.setValidStrings("mz_measure", StringList::create("ppm,Da"));
 
48
                defaults_.setValue("mz_reference", "precursor", "source of m/z values for peptide identifications"); 
 
49
                defaults_.setValidStrings("mz_reference", StringList::create("precursor,peptide"));
 
50
                
 
51
                defaults_.setValue("ignore_charge", "false", "For feature/consensus maps: Assign an ID independently of whether its charge state matches that of the (consensus) feature.");
 
52
                defaults_.setValidStrings("ignore_charge", StringList::create("true,false"));
 
53
 
 
54
                defaultsToParam_();  
 
55
  }
 
56
 
 
57
                        
 
58
        IDMapper::IDMapper(const IDMapper& cp)
 
59
        : DefaultParamHandler(cp),
 
60
                rt_tolerance_(cp.rt_tolerance_),
 
61
        mz_tolerance_(cp.mz_tolerance_),
 
62
        measure_(cp.measure_),
 
63
                ignore_charge_(cp.ignore_charge_)
 
64
        {
 
65
                updateMembers_();
 
66
        }
 
67
 
 
68
        IDMapper& IDMapper::operator= (const IDMapper& rhs)
 
69
        {
 
70
                if (this == &rhs) return *this;
 
71
                
 
72
                DefaultParamHandler::operator= (rhs);
 
73
                rt_tolerance_=rhs.rt_tolerance_;
 
74
        mz_tolerance_=rhs.mz_tolerance_;
 
75
        measure_=rhs.measure_;
 
76
                ignore_charge_ = rhs.ignore_charge_;
 
77
                updateMembers_();
 
78
                
 
79
                return *this;
 
80
        }
 
81
 
 
82
        void IDMapper::updateMembers_()
 
83
        {
 
84
                rt_tolerance_ = param_.getValue("rt_tolerance");
 
85
                mz_tolerance_ = param_.getValue("mz_tolerance");
 
86
                measure_ = param_.getValue("mz_measure")=="ppm"? MEASURE_PPM : MEASURE_DA;
 
87
                ignore_charge_ = param_.getValue("ignore_charge") == "true";
 
88
        }       
 
89
        
 
90
        void IDMapper::annotate(ConsensusMap& map, const std::vector<PeptideIdentification>& ids, const std::vector<ProteinIdentification>& protein_ids, bool measure_from_subelements)
 
91
        {
 
92
                // validate "RT" and "MZ" metavalues exist
 
93
                checkHits_(ids);
 
94
                                
 
95
                //append protein identifications to Map
 
96
                map.getProteinIdentifications().insert(map.getProteinIdentifications().end(),protein_ids.begin(),protein_ids.end());
 
97
 
 
98
                //keep track of assigned/unassigned peptide identifications
 
99
                std::map<Size,Size> assigned;
 
100
                                        
 
101
                // store which peptides fit which feature (and avoid double entries)
 
102
                // consensusMap -> {peptide_index}
 
103
                std::vector < std::set< size_t> > mapping(map.size());
 
104
 
 
105
                DoubleList mz_values;
 
106
                DoubleReal rt_pep;
 
107
                IntList charges;
 
108
                
 
109
                //iterate over the peptide IDs
 
110
                for (Size i=0; i<ids.size(); ++i)
 
111
                {
 
112
                        if (ids[i].getHits().empty()) continue;
 
113
                        
 
114
                        getIDDetails_(ids[i], rt_pep, mz_values, charges);
 
115
 
 
116
                        //iterate over the features
 
117
                        for(Size cm_index = 0 ; cm_index<map.size(); ++cm_index)
 
118
                        {
 
119
                                // if set to TRUE, we leave the i_mz-loop as we added the whole ID with all hits
 
120
                                bool was_added=false; // was current pep-m/z matched?!
 
121
 
 
122
                                // iterate over m/z values of pepIds
 
123
                                for (Size i_mz = 0; i_mz < mz_values.size(); ++i_mz)
 
124
                                {
 
125
                                        DoubleReal mz_pep = mz_values[i_mz];
 
126
                                        
 
127
                                        // charge states to use for checking:
 
128
                                        IntList current_charges;
 
129
                                        if (!ignore_charge_)
 
130
                                        {
 
131
                                                // if "mz_ref." is "precursor", we have only one m/z value to check,
 
132
                                                // but still one charge state per peptide hit that could match:
 
133
                                                if (mz_values.size() == 1)
 
134
                                                {
 
135
                                                        current_charges = charges;
 
136
                                                }
 
137
                                                else current_charges << charges[i_mz];
 
138
                                                current_charges << 0; // "not specified" always matches
 
139
                                        }
 
140
                                        
 
141
                                        //check if we compare distance from centroid or subelements
 
142
                                        if (!measure_from_subelements)
 
143
                                        {
 
144
                                                if (isMatch_(rt_pep - map[cm_index].getRT(), mz_pep, map[cm_index].getMZ()) && (ignore_charge_ || current_charges.contains(map[cm_index].getCharge())))
 
145
                                                {
 
146
                                                        was_added = true;
 
147
                                                        map[cm_index].getPeptideIdentifications().push_back(ids[i]);
 
148
                                                        ++assigned[i];
 
149
                                                }
 
150
                                        }
 
151
                                        else
 
152
                                        {
 
153
                                                for(ConsensusFeature::HandleSetType::const_iterator it_handle = map[cm_index].getFeatures().begin(); 
 
154
                                                                it_handle != map[cm_index].getFeatures().end(); 
 
155
                                                                ++it_handle)
 
156
                                                {
 
157
                                                        if (isMatch_(rt_pep - it_handle->getRT(), mz_pep, it_handle->getMZ())  && (ignore_charge_ || current_charges.contains(it_handle->getCharge())))
 
158
                                                        {
 
159
                                                                was_added = true;
 
160
                                                                if (mapping[cm_index].count(i) == 0)
 
161
                                                                {
 
162
                                                                        map[cm_index].getPeptideIdentifications().push_back(ids[i]);
 
163
                                                                        ++assigned[i];
 
164
                                                                        mapping[cm_index].insert(i);
 
165
                                                                }
 
166
                                                                break; // we added this peptide already.. no need to check other handles
 
167
                                                        }
 
168
                                                }
 
169
                                                // continue to here
 
170
                                        }
 
171
                                        
 
172
                                        if (was_added) break;
 
173
                                        
 
174
                                } // m/z values to check
 
175
                                
 
176
                                // break to here
 
177
                                        
 
178
                        } // features
 
179
                } // Identifications
 
180
 
 
181
 
 
182
    Size matches_none(0);
 
183
    Size matches_single(0);
 
184
    Size matches_multi(0);
 
185
 
 
186
                //append unassigned peptide identifications
 
187
                for (Size i=0; i<ids.size(); ++i)
 
188
                {
 
189
                        if (assigned[i]==0)
 
190
                        {
 
191
                                map.getUnassignedPeptideIdentifications().push_back(ids[i]);
 
192
        ++matches_none;
 
193
                        }
 
194
      else if (assigned[i]==1)
 
195
      {
 
196
        ++matches_single;
 
197
      }
 
198
      else if (assigned[i]>1)
 
199
      {
 
200
        ++matches_multi;
 
201
      }
 
202
                }
 
203
 
 
204
    //some statistics output
 
205
          LOG_INFO << "Unassigned peptides: " << matches_none << "\n"
 
206
                                           << "Peptides assigned to exactly one feature: " 
 
207
                                           << matches_single << "\n"
 
208
                                           << "Peptides assigned to multiple features: " 
 
209
                                           << matches_multi << std::endl;
 
210
 
 
211
        }
 
212
 
 
213
        DoubleReal IDMapper::getAbsoluteMZTolerance_(const DoubleReal mz) const
 
214
        {
 
215
                if (measure_==MEASURE_PPM)
 
216
                {
 
217
                        return (mz * mz_tolerance_ / 1e6);
 
218
                } 
 
219
                else if (measure_==MEASURE_DA)
 
220
                {
 
221
                        return mz_tolerance_;
 
222
                }
 
223
                throw Exception::InvalidValue(__FILE__, __LINE__, __PRETTY_FUNCTION__, "IDMapper::getAbsoluteTolerance_(): illegal internal state of measure_!", String(measure_));
 
224
        }
 
225
 
 
226
        bool IDMapper::isMatch_(const DoubleReal rt_distance, const DoubleReal mz_theoretical, const DoubleReal mz_observed) const
 
227
        {
 
228
                if (measure_==MEASURE_PPM)
 
229
                {
 
230
                        return (fabs(rt_distance) <= rt_tolerance_) && (fabs((mz_theoretical*1e6-mz_observed*1e6)/mz_theoretical) <= mz_tolerance_);
 
231
                } 
 
232
                else if (measure_==MEASURE_DA)
 
233
                {
 
234
                        return (fabs(rt_distance) <= rt_tolerance_) && (fabs(mz_theoretical-mz_observed) <= mz_tolerance_);
 
235
                }
 
236
                throw Exception::InvalidValue(__FILE__, __LINE__, __PRETTY_FUNCTION__, "IDMapper::getAbsoluteTolerance_(): illegal internal state of measure_!", String(measure_));
 
237
        }
 
238
 
 
239
        void IDMapper::checkHits_(const std::vector<PeptideIdentification>& ids) const
 
240
        {
 
241
                for (Size i=0; i<ids.size(); ++i)
 
242
                {
 
243
                        if (!ids[i].metaValueExists("RT"))
 
244
                        {
 
245
                                throw Exception::MissingInformation(__FILE__,__LINE__,__PRETTY_FUNCTION__, "IDMapper: meta data value 'RT' missing for peptide identification!"); 
 
246
                        }
 
247
                        if (!ids[i].metaValueExists("MZ"))
 
248
                        {
 
249
                                throw Exception::MissingInformation(__FILE__,__LINE__,__PRETTY_FUNCTION__, "IDMapper: meta data value 'MZ' missing for peptide identification!"); 
 
250
                        }
 
251
                }
 
252
        }
 
253
        
 
254
        void IDMapper::getIDDetails_(const PeptideIdentification& id, DoubleReal& rt_pep, DoubleList& mz_values, IntList& charges, bool use_avg_mass) const
 
255
        {
 
256
                mz_values.clear();
 
257
                charges.clear();
 
258
                
 
259
                rt_pep = id.getMetaValue("RT");
 
260
                
 
261
                // collect m/z values of pepId
 
262
                if (param_.getValue("mz_reference") == "precursor")
 
263
                { // use precursor m/z of pepId
 
264
                        mz_values << id.getMetaValue("MZ");
 
265
                }
 
266
 
 
267
                for (vector<PeptideHit>::const_iterator hit_it = id.getHits().begin(); 
 
268
                                 hit_it != id.getHits().end(); ++hit_it)
 
269
                {
 
270
                        Int charge = hit_it->getCharge();
 
271
                        charges << charge;
 
272
 
 
273
                        if (param_.getValue("mz_reference") == "peptide")
 
274
                        { // use mass of each pepHit (assuming H+ adducts)
 
275
                                DoubleReal mass = use_avg_mass ? 
 
276
                                        hit_it->getSequence().getAverageWeight(Residue::Full, charge) : 
 
277
                                        hit_it->getSequence().getMonoWeight(Residue::Full, charge);
 
278
                                
 
279
                                mz_values << mass / (DoubleReal) charge;
 
280
                        }
 
281
                }
 
282
        }
 
283
 
 
284
 
 
285
        void IDMapper::increaseBoundingBox_(DBoundingBox<2>& box)
 
286
        {
 
287
                DPosition<2> sub_min(rt_tolerance_, 
 
288
                                                                                                 getAbsoluteMZTolerance_(box.minPosition().getY())),
 
289
                        add_max(rt_tolerance_, getAbsoluteMZTolerance_(box.maxPosition().getY()));
 
290
 
 
291
                box.setMin(box.minPosition() - sub_min);
 
292
                box.setMax(box.maxPosition() + add_max);
 
293
        }
 
294
 
 
295
 
 
296
        bool IDMapper::checkMassType_(const vector<DataProcessing>& processing) const
 
297
        {
 
298
                bool use_avg_mass = false;
 
299
                String before;
 
300
                for (vector<DataProcessing>::const_iterator proc_it = processing.begin(); 
 
301
                                 proc_it != processing.end(); ++proc_it)
 
302
                {
 
303
                        if (proc_it->getSoftware().getName() == "FeatureFinder")
 
304
                        {
 
305
                                String reported_mz = proc_it->
 
306
                                        getMetaValue("parameter: algorithm:feature:reported_mz");
 
307
                                if (reported_mz.empty()) continue; // parameter info not available
 
308
                                if (!before.empty() && (reported_mz != before))
 
309
                                {
 
310
                                        LOG_WARN << "The m/z values reported for features in the input seem to be of different types (e.g. monoisotopic/average). They will all be compared against monoisotopic peptide masses, but the mapping results may not be meaningful in the end." << endl;
 
311
                                        return false;
 
312
                                }
 
313
                                if (reported_mz == "average")
 
314
                                {
 
315
                                        use_avg_mass = true;
 
316
                                }
 
317
                                else if (reported_mz == "maximum")
 
318
                                {
 
319
                                        LOG_WARN << "For features, m/z values from the highest mass traces are reported. This type of m/z value is not available for peptides, so the comparison has to be done using average peptide masses." << endl;
 
320
                                        use_avg_mass = true;
 
321
                                }
 
322
                                before = reported_mz;
 
323
                        }
 
324
                }
 
325
                return use_avg_mass;
 
326
        }
 
327
 
 
328
 
 
329
} // namespace OpenMS