~ubuntu-branches/ubuntu/wily/openms/wily

« back to all changes in this revision

Viewing changes to include/OpenMS/FILTERING/CALIBRATION/InternalCalibration.h

  • Committer: Package Import Robot
  • Author(s): Filippo Rusconi
  • Date: 2013-12-20 11:30:16 UTC
  • mfrom: (5.1.2 sid)
  • Revision ID: package-import@ubuntu.com-20131220113016-wre5g9bteeheq6he
Tags: 1.11.1-3
* remove version number from libbost development package names;
* ensure that AUTHORS is correctly shipped in all packages.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
// -*- mode: C++; tab-width: 2; -*-
2
 
// vi: set ts=2:
3
 
//
4
 
// --------------------------------------------------------------------------
5
 
//                   OpenMS Mass Spectrometry Framework
6
 
// --------------------------------------------------------------------------
7
 
//  Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
8
 
//
9
 
//  This library is free software; you can redistribute it and/or
10
 
//  modify it under the terms of the GNU Lesser General Public
11
 
//  License as published by the Free Software Foundation; either
12
 
//  version 2.1 of the License, or (at your option) any later version.
13
 
//
14
 
//  This library is distributed in the hope that it will be useful,
15
 
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 
//  Lesser General Public License for more details.
18
 
//
19
 
//  You should have received a copy of the GNU Lesser General Public
20
 
//  License along with this library; if not, write to the Free Software
21
 
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
1
// --------------------------------------------------------------------------
 
2
//                   OpenMS -- Open-Source Mass Spectrometry
 
3
// --------------------------------------------------------------------------
 
4
// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
 
5
// ETH Zurich, and Freie Universitaet Berlin 2002-2013.
 
6
//
 
7
// This software is released under a three-clause BSD license:
 
8
//  * Redistributions of source code must retain the above copyright
 
9
//    notice, this list of conditions and the following disclaimer.
 
10
//  * Redistributions in binary form must reproduce the above copyright
 
11
//    notice, this list of conditions and the following disclaimer in the
 
12
//    documentation and/or other materials provided with the distribution.
 
13
//  * Neither the name of any author or any participating institution
 
14
//    may be used to endorse or promote products derived from this software
 
15
//    without specific prior written permission.
 
16
// For a full list of authors, refer to the file AUTHORS.
 
17
// --------------------------------------------------------------------------
 
18
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 
19
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 
20
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 
21
// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
 
22
// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 
23
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 
24
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 
25
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 
26
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 
27
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 
28
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22
29
//
23
30
// --------------------------------------------------------------------------
24
31
// $Maintainer: Alexandra Zerck $
40
47
 
41
48
namespace OpenMS
42
49
{
43
 
        
 
50
 
44
51
  /**
45
52
     @brief A simple calibration method using linear interpolation of given reference masses.
46
53
 
47
54
     This class implements a simle calibration method: given a list of reference masses,
48
55
     the relative errors of the peaks in the data are approximated by linear interpolation and
49
 
     subtracted from the data. 
50
 
        
51
 
           @htmlinclude OpenMS_InternalCalibration.parameters
52
 
 
53
 
           @ingroup SignalProcessing
 
56
     subtracted from the data.
 
57
 
 
58
       @htmlinclude OpenMS_InternalCalibration.parameters
 
59
 
 
60
       @ingroup SignalProcessing
54
61
  */
55
 
  class OPENMS_DLLAPI InternalCalibration 
56
 
        : public DefaultParamHandler, 
57
 
                public ProgressLogger
 
62
  class OPENMS_DLLAPI InternalCalibration :
 
63
    public DefaultParamHandler,
 
64
    public ProgressLogger
58
65
  {
59
 
  public:
 
66
public:
60
67
    /// Default constructor
61
68
    InternalCalibration();
62
69
 
63
 
                /// Destructor
 
70
    /// Destructor
64
71
    ~InternalCalibration(){}
65
72
 
66
 
        /**
 
73
    /**
67
74
     @brief Calibrate a peak map using given reference masses with a separate calibration function for each spectrum.
68
 
     
69
 
                 The calibration function is calculated for each spectrum
70
 
                 separately. If not enough reference masses are found for a spectrum it is left uncalibrated.
 
75
 
 
76
         The calibration function is calculated for each spectrum
 
77
         separately. If not enough reference masses are found for a spectrum it is left uncalibrated.
71
78
     For the matching of the reference masses and the peaks, the parameter mz_tolerance is used to
72
79
     calculate a window around the reference masses. If more than one peak is found within this window the
73
80
     closest peak is taken.
74
81
     @param exp the uncalibrated peak map
75
82
     @param calibrated_exp the calibrated peak map
76
83
     @param ref_masses the reference m/z values
77
 
        */              
78
 
    template<typename InputPeakType>
79
 
    void calibrateMapSpectrumwise(const MSExperiment<InputPeakType>& exp,MSExperiment<InputPeakType>& calibrated_exp, std::vector<DoubleReal>& ref_masses);
 
84
    */
 
85
    template <typename InputPeakType>
 
86
    void calibrateMapSpectrumwise(const MSExperiment<InputPeakType> & exp, MSExperiment<InputPeakType> & calibrated_exp, std::vector<DoubleReal> & ref_masses);
80
87
 
81
 
        /**
 
88
    /**
82
89
     @brief Calibrate a peak map using given reference masses with one calibration function for the whole map.
83
 
     
84
 
                 The calibration function is calculated for the whole map.
 
90
 
 
91
         The calibration function is calculated for the whole map.
85
92
     For the matching of the reference masses and the peaks the parameter mz_tolerance is used to
86
93
     calculate a window around the reference masses. If more than one peak is found within this window the
87
94
     closest peak is taken.
89
96
     @param calibrated_exp the calibrated peak map
90
97
     @param ref_masses the reference m/z values
91
98
     @param trafo_file_name file where the transformation function of the calibration is stored
92
 
        */              
93
 
    template<typename InputPeakType>
94
 
    void calibrateMapGlobally(const MSExperiment<InputPeakType>& exp,MSExperiment<InputPeakType>& calibrated_exp, std::vector<DoubleReal>& ref_masses,String trafo_file_name = "");
 
99
    */
 
100
    template <typename InputPeakType>
 
101
    void calibrateMapGlobally(const MSExperiment<InputPeakType> & exp, MSExperiment<InputPeakType> & calibrated_exp, std::vector<DoubleReal> & ref_masses, String trafo_file_name = "");
95
102
 
96
 
        /**
 
103
    /**
97
104
     @brief Calibrate a peak map using given reference ids with one calibration function for the whole map.
98
 
     
99
 
                 Calibrate a map using given peptide identifications. The calibration function is calculated for the whole map.
 
105
 
 
106
         Calibrate a map using given peptide identifications. The calibration function is calculated for the whole map.
100
107
     The m/z-values of the reference identifications are calculated through the given sequence and charge of the peptide.
101
108
     For the matching of the reference masses and the peaks the parameter mz_tolerance is used to
102
109
     calculate a window around the reference masses. If more than one peak is found within this window the
105
112
     @param calibrated_exp the calibrated peak map
106
113
     @param ref_ids the reference peptide identifications
107
114
     @param trafo_file_name file where the transformation function of the calibration is stored
108
 
        */
109
 
    template<typename InputPeakType>
110
 
    void calibrateMapGlobally(const MSExperiment<InputPeakType>& exp, MSExperiment<InputPeakType>& calibrated_exp,std::vector<PeptideIdentification>& ref_ids,String trafo_file_name = "");
 
115
    */
 
116
    template <typename InputPeakType>
 
117
    void calibrateMapGlobally(const MSExperiment<InputPeakType> & exp, MSExperiment<InputPeakType> & calibrated_exp, std::vector<PeptideIdentification> & ref_ids, String trafo_file_name = "");
111
118
 
112
 
        /**
 
119
    /**
113
120
     @brief Calibrate an annotated feature map with one calibration function for the whole map.
114
 
     
115
 
                 Calibrate an annotated (!) feature map using the features' identifications. The calibration function is calculated for the whole map. The m/z-values of the reference identifications are calculated through the given sequence and charge of the peptide.
 
121
 
 
122
         Calibrate an annotated (!) feature map using the features' identifications. The calibration function is calculated for the whole map. The m/z-values of the reference identifications are calculated through the given sequence and charge of the peptide.
116
123
     @param feature_map the uncalibrated feature map (annotated with peptide ids)
117
124
     @param calibrated_feature_map the calibrated feature map
118
125
     @param trafo_file_name file where the transformation function of the calibration is stored
119
 
        */
120
 
    void calibrateMapGlobally(const FeatureMap<>& feature_map, FeatureMap<>& calibrated_feature_map,String trafo_file_name = "");
 
126
    */
 
127
    void calibrateMapGlobally(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map, String trafo_file_name = "");
121
128
 
122
 
        /**
 
129
    /**
123
130
     @brief Calibrate a feature map using given reference ids with one calibration function for the whole map.
124
 
     
125
 
                 Calibrate a feature map using given peptide identifications. The calibration function is calculated for the whole map. Even if the features are already annotated with peptide ids these annotations are ignored for the calibration, only the reference ids are used. The m/z-values of the reference identifications are calculated through the given sequence and charge of the peptide. The reference ids are mapped onto the FeatureMap using IDMapper with the mz_tolerance and rt_tolerance parameters.
 
131
 
 
132
         Calibrate a feature map using given peptide identifications. The calibration function is calculated for the whole map. Even if the features are already annotated with peptide ids these annotations are ignored for the calibration, only the reference ids are used. The m/z-values of the reference identifications are calculated through the given sequence and charge of the peptide. The reference ids are mapped onto the FeatureMap using IDMapper with the mz_tolerance and rt_tolerance parameters.
126
133
     @param feature_map the uncalibrated feature map
127
134
     @param calibrated_feature_map the calibrated feature map
128
135
     @param ref_ids the reference peptide identifications
129
136
     @param trafo_file_name file where the transformation function of the calibration is stored
130
 
        */
131
 
    void calibrateMapGlobally(const FeatureMap<>& feature_map, FeatureMap<>& calibrated_feature_map,std::vector<PeptideIdentification>& ref_ids,String trafo_file_name = "");
132
 
 
133
 
 
134
 
                
135
 
    template<typename InputPeakType>
136
 
        void calibrateMapList(std::vector<MSExperiment<InputPeakType> >& exp_list,std::vector<MSExperiment<InputPeakType> >& calibrated_exp_list, std::vector<DoubleReal>& ref_masses, std::vector<DoubleReal>& detected_background_masses);
137
 
 
138
 
 
139
 
  protected:
140
 
 
141
 
        /// the actual calibration function
142
 
        void makeLinearRegression_(std::vector<DoubleReal>& observed_masses, std::vector<DoubleReal>& theoretical_masses);
143
 
                
144
 
        /// check if reference ids contain RT and MZ information as meta values
145
 
        void checkReferenceIds_(std::vector<PeptideIdentification>& pep_ids);
146
 
                
147
 
        /// check if reference ids contain RT and MZ information as meta values
148
 
        void checkReferenceIds_(const FeatureMap<>& feature_map);
149
 
                
150
 
        /// apply transformation to all features (including subordinates and convex hulls)
151
 
  void applyTransformation_(const FeatureMap<>& feature_map,FeatureMap<>& calibrated_feature_map);
152
 
 
153
 
        /// here the transformation is stored
154
 
        TransformationDescription trafo_;
155
 
  };// class InternalCalibration
156
 
 
157
 
 
158
 
  template<typename InputPeakType>
159
 
  void InternalCalibration::calibrateMapSpectrumwise(const MSExperiment<InputPeakType>& exp, MSExperiment<InputPeakType>& calibrated_exp,std::vector<DoubleReal>& ref_masses)
160
 
  {
161
 
#ifdef DEBUG_CALIBRATION
162
 
                std::cout.precision(writtenDigits<DoubleReal>());
163
 
#endif
164
 
                if(exp.empty())
165
 
                        {
166
 
                                std::cout << "Input is empty."<<std::endl;
167
 
                                return;
168
 
                        }
169
 
                
170
 
                if(exp[0].getType() != SpectrumSettings::PEAKS)
171
 
                        {
172
 
                                std::cout << "Attention: this function is assuming peak data."<<std::endl;
173
 
                        }
174
 
                calibrated_exp = exp;
175
 
                
176
 
    Size num_ref_peaks = ref_masses.size();
177
 
    bool use_ppm = (param_.getValue("mz_tolerance_unit") == "ppm" ) ? true : false;
178
 
                DoubleReal mz_tol = param_.getValue("mz_tolerance");
179
 
    startProgress(0,exp.size(),"calibrate spectra");    
180
 
    // for each spectrum
181
 
    for(Size spec=0;spec <  exp.size(); ++spec)
182
 
      {
183
 
                                // calibrate only MS1 spectra
184
 
                                if(exp[spec].getMSLevel() != 1)
185
 
                                        {
186
 
                                                continue;
187
 
                                        }
188
 
                                
189
 
                                
190
 
                                std::vector<DoubleReal> corr_masses,rel_errors,found_ref_masses;
191
 
                                UInt corr_peaks=0;
192
 
                                for(Size peak=0;peak <  exp[spec].size(); ++peak)
193
 
                                        {
194
 
                                                for(Size ref_peak=0; ref_peak < num_ref_peaks;++ref_peak)
195
 
                                                        {
196
 
                                                                        if(!use_ppm &&  fabs(exp[spec][peak].getMZ() - ref_masses[ref_peak]) <  mz_tol)
197
 
                                                                        {
198
 
                                                                                found_ref_masses.push_back(ref_masses[ref_peak]);
199
 
                                                                                corr_masses.push_back(exp[spec][peak].getMZ());
200
 
                                                                                ++corr_peaks;
201
 
                                                                                break;
202
 
                                                                        }
203
 
                                                                        else if(use_ppm &&  fabs(exp[spec][peak].getMZ() - ref_masses[ref_peak]) / ref_masses[ref_peak] * 1e6<  mz_tol)
204
 
                                                                        {
205
 
                                                                                found_ref_masses.push_back(ref_masses[ref_peak]);
206
 
                                                                                corr_masses.push_back(exp[spec][peak].getMZ());
207
 
                                                                                ++corr_peaks;
208
 
                                                                                break;
209
 
                                                                        }
210
 
                                                        }
211
 
                                        }
212
 
                                if(corr_peaks < 2)
213
 
                                        {
214
 
                                                std::cout << "spec: "<<spec
215
 
                                                                                        << " less than 2 reference masses were detected within a reasonable error range\n";
216
 
                                                std::cout << "This spectrum cannot be calibrated!\n";
217
 
                                                continue;
218
 
                                        }
219
 
                                
220
 
                                // determine rel error in ppm for the two reference masses
221
 
                                for(Size ref_peak=0; ref_peak < found_ref_masses.size();++ref_peak)
222
 
                                        {
223
 
                                                        rel_errors.push_back((found_ref_masses[ref_peak]-corr_masses[ref_peak])/corr_masses[ref_peak] * 1e6);
224
 
                                        }
225
 
 
226
 
                                makeLinearRegression_(corr_masses,found_ref_masses);
227
 
                                
228
 
                                // now calibrate the whole spectrum
229
 
                                for(unsigned int peak=0;peak <  calibrated_exp[spec].size(); ++peak)
230
 
                                        {
231
 
#ifdef DEBUG_CALIBRATION
232
 
                                                        std::cout << calibrated_exp[spec][peak].getMZ()<< "\t";
233
 
#endif
234
 
                                                        DoubleReal mz = calibrated_exp[spec][peak].getMZ();
235
 
                                                        mz = trafo_.apply(mz);
236
 
                                                        calibrated_exp[spec][peak].setMZ(mz);
237
 
#ifdef DEBUG_CALIBRATION
238
 
                                                std::cout       << calibrated_exp[spec][peak].getMZ()<< std::endl;
239
 
#endif
240
 
 
241
 
                                        }
242
 
                                setProgress(spec);
243
 
      }// for(Size spec=0;spec <  exp.size(); ++spec)
244
 
                endProgress();
245
 
        }
246
 
 
247
 
         
248
 
  template<typename InputPeakType>
249
 
  void InternalCalibration::calibrateMapGlobally(const MSExperiment<InputPeakType>& exp,
250
 
                                                                                                                                                                                                 MSExperiment<InputPeakType>& calibrated_exp,
251
 
                                                                                                                                                                                                 std::vector<PeptideIdentification>& ref_ids,String trafo_file_name)
252
 
        {
253
 
                bool use_ppm = param_.getValue("mz_tolerance_unit") == "ppm" ? true : false;
254
 
                DoubleReal mz_tolerance = param_.getValue("mz_tolerance");
255
 
                if(exp.empty())
256
 
                        {
257
 
                                std::cout << "Input is empty."<<std::endl;
258
 
                                return;
259
 
                        }
260
 
                
261
 
                if(exp[0].getType() != SpectrumSettings::PEAKS)
262
 
                        {
263
 
                                std::cout << "Attention: this function is assuming peak data."<<std::endl;
264
 
                        }
265
 
                // check if the ids contain meta information about the peak positions
266
 
                checkReferenceIds_(ref_ids);
267
 
 
268
 
                std::vector<DoubleReal> theoretical_masses,observed_masses;
269
 
                for(Size p_id = 0; p_id < ref_ids.size();++p_id)
270
 
                        {
271
 
                                for(Size p_h = 0; p_h < ref_ids[p_id].getHits().size();++p_h)
272
 
                                        {
273
 
                                                Int charge = ref_ids[p_id].getHits()[p_h].getCharge();
274
 
                                                DoubleReal theo_mass = ref_ids[p_id].getHits()[p_h].getSequence().getMonoWeight(Residue::Full,charge)/(DoubleReal)charge;
275
 
                                                // first find corresponding ms1-spectrum
276
 
                                                typename MSExperiment<InputPeakType>::ConstIterator rt_iter = exp.RTBegin(ref_ids[p_id].getMetaValue("RT"));
277
 
                                                while(rt_iter != exp.begin() && rt_iter->getMSLevel() != 1) 
278
 
                                                        {
279
 
                                                                --rt_iter;
280
 
                                                        }
281
 
                                                // now find closest peak
282
 
                                                typename MSSpectrum<InputPeakType>::ConstIterator mz_iter = rt_iter->MZBegin(ref_ids[p_id].getMetaValue("MZ"));
283
 
                                                //                                      std::cout << mz_iter->getMZ() <<" "<<(DoubleReal)ref_ids[p_id].getMetaValue("MZ")<<"\t";
284
 
                                                DoubleReal dist = (DoubleReal)ref_ids[p_id].getMetaValue("MZ") - mz_iter->getMZ();
285
 
                                                //                                      std::cout << dist << "\t";
286
 
                                                if((mz_iter +1) != rt_iter->end()
287
 
                                                         && fabs((mz_iter +1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < fabs(dist)
288
 
                                                         && mz_iter != rt_iter->begin()
289
 
                                                         && fabs((mz_iter -1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < fabs((mz_iter +1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ"))) // if mz_iter +1 has smaller dist than mz_iter and mz_iter-1
290
 
                                                        {
291
 
                                                                if((use_ppm &&
292
 
                                                                         fabs((mz_iter +1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) / (DoubleReal)ref_ids[p_id].getMetaValue("MZ") *1e06< mz_tolerance) ||
293
 
                                                                         (!use_ppm && fabs((mz_iter+1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < mz_tolerance))
294
 
                                                                        {
295
 
                                                                                //              std::cout <<(mz_iter +1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")<<"\t";
296
 
                                                                                observed_masses.push_back((mz_iter +1)->getMZ());
297
 
                                                                                theoretical_masses.push_back(theo_mass);
298
 
                                                                                //                                                                      std::cout << (mz_iter +1)->getMZ() << " ~ "<<theo_mass << " charge: "<<ref_ids[p_id].getHits()[p_h].getCharge()
299
 
                                                                                //                      << "\tplus 1"<< std::endl;
300
 
                                                                        }
301
 
                                                        }
302
 
                                                else if(mz_iter != rt_iter->begin()
303
 
                                                                                && fabs((mz_iter -1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < fabs(dist)) // if mz_iter-1 has smaller dist than mz_iter
304
 
                                                        {
305
 
                                                                if((use_ppm &&
306
 
                                                                         fabs((mz_iter -1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) / (DoubleReal)ref_ids[p_id].getMetaValue("MZ") *1e06< mz_tolerance) ||
307
 
                                                                         (!use_ppm && fabs((mz_iter-1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < mz_tolerance))
308
 
                                                                        {
309
 
                                                                                //                                                                      std::cout <<(mz_iter -1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")<<"\t";
310
 
                                                                                observed_masses.push_back((mz_iter -1)->getMZ());
311
 
                                                                                theoretical_masses.push_back(theo_mass);
312
 
                                                                                //                                                                      std::cout << (mz_iter -1)->getMZ() << " ~ "<<theo_mass << " charge: "<<ref_ids[p_id].getHits()[p_h].getCharge()
313
 
                                                                                //                      << "\tminus 1"<< std::endl;
314
 
                                                                        }
315
 
                                                        }
316
 
                                                else
317
 
                                                        {
318
 
                                                                if((use_ppm &&
319
 
                                                                                fabs((mz_iter)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) / (DoubleReal)ref_ids[p_id].getMetaValue("MZ") *1e06< mz_tolerance) ||
320
 
                                                                         (!use_ppm && fabs((mz_iter)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < mz_tolerance))
321
 
                                                                        {
322
 
                                                                                
323
 
                                                                                observed_masses.push_back(mz_iter->getMZ());
324
 
                                                                                theoretical_masses.push_back(theo_mass);
325
 
//                                                                              std::cout <<"\t"<< mz_iter->getMZ() << " ~ "<<theo_mass<< " charge: "<<ref_ids[p_id].getHits()[p_h].getCharge()
326
 
//                                                                                                                      << "\tat mz_iter"<< std::endl;
327
 
                                                                        }
328
 
                                                        }
329
 
                                        }
330
 
                        }
331
 
 
332
 
                makeLinearRegression_(observed_masses,theoretical_masses);
333
 
                static_cast<ExperimentalSettings&>(calibrated_exp) = exp;
334
 
                calibrated_exp.resize(exp.size());
335
 
 
336
 
                // for each spectrum
337
 
                for(Size spec=0;spec <  exp.size(); ++spec)
338
 
      {
339
 
                                // calibrate only MS1 spectra
340
 
                                if(exp[spec].getMSLevel() != 1)
341
 
                                        {
342
 
                                                calibrated_exp[spec] = exp[spec];
343
 
                                                continue;
344
 
                                        }
345
 
                                // copy the spectrum meta data
346
 
                                calibrated_exp[spec] = exp[spec];
347
 
 
348
 
                                for(unsigned int peak=0;peak <  exp[spec].size(); ++peak)
349
 
                                        {
350
 
#ifdef DEBUG_CALIBRATION
351
 
                                                std::cout << exp[spec][peak].getMZ()<< "\t";
352
 
#endif
353
 
                                                DoubleReal mz = exp[spec][peak].getMZ();
354
 
                                                mz = trafo_.apply(mz);
355
 
                                                calibrated_exp[spec][peak].setMZ(mz);
356
 
#ifdef DEBUG_CALIBRATION
357
 
                                                std::cout << calibrated_exp[spec][peak].getMZ()<< std::endl;
358
 
#endif
359
 
 
360
 
                                        }
361
 
      }// for(Size spec=0;spec <  exp.size(); ++spec)
362
 
                if(trafo_file_name != "")
363
 
                        {
364
 
                                TransformationXMLFile().store(trafo_file_name,trafo_);
365
 
                        }
366
 
        }
367
 
 
368
 
 
369
 
        template<typename InputPeakType>
370
 
  void InternalCalibration::calibrateMapGlobally(const MSExperiment<InputPeakType>& exp, MSExperiment<InputPeakType>& calibrated_exp,std::vector<DoubleReal>& ref_masses,String trafo_file_name)
371
 
        {
372
 
                if(exp.empty())
373
 
                        {
374
 
                                std::cout << "Input is empty."<<std::endl;
375
 
                                return;
376
 
                        }
377
 
                        
378
 
                if(exp[0].getType() != SpectrumSettings::PEAKS)
379
 
                        {
380
 
                                std::cout << "Attention: this function is assuming peak data."<<std::endl;
381
 
                        }
382
 
 
383
 
 
384
 
    Size num_ref_peaks = ref_masses.size();
385
 
    bool use_ppm = (param_.getValue("mz_tolerance_unit") == "ppm" ) ? true : false;
386
 
                DoubleReal mz_tol = param_.getValue("mz_tolerance");
387
 
    startProgress(0,exp.size(),"calibrate spectra");    
388
 
                std::vector<DoubleReal> corr_masses,rel_errors,found_ref_masses;
389
 
                UInt corr_peaks=0;
390
 
    // for each spectrum
391
 
    for(Size spec=0;spec <  exp.size(); ++spec)
392
 
      {
393
 
        // calibrate only MS1 spectra
394
 
                                if(exp[spec].getMSLevel() != 1) continue;
395
 
                                for(Size peak=0;peak <  exp[spec].size(); ++peak)
396
 
                                        {
397
 
                                                for(Size ref_peak=0; ref_peak < num_ref_peaks;++ref_peak)
398
 
                                                        {
399
 
                                                                if(!use_ppm &&  fabs(exp[spec][peak].getMZ() - ref_masses[ref_peak]) <  mz_tol)
400
 
                                                                        {
401
 
                                                                                found_ref_masses.push_back(ref_masses[ref_peak]);
402
 
                                                                                corr_masses.push_back(exp[spec][peak].getMZ());
403
 
                                                                                ++corr_peaks;
404
 
                                                                                break;
405
 
                                                                        }
406
 
                                                                else if(use_ppm &&  fabs(exp[spec][peak].getMZ() - ref_masses[ref_peak]) / ref_masses[ref_peak] * 1e6<  mz_tol)
407
 
                                                                        {
408
 
                                                                                found_ref_masses.push_back(ref_masses[ref_peak]);
409
 
                                                                                corr_masses.push_back(exp[spec][peak].getMZ());
410
 
                                                                                ++corr_peaks;
411
 
                                                                                break;
412
 
                                                                        }
413
 
                                                        }
414
 
                                        }
415
 
                        }
416
 
                if(corr_peaks < 2)
417
 
                        {
418
 
                                std::cout << "Less than 2 reference masses were detected within a reasonable error range\n";
419
 
                                std::cout << "This spectrum cannot be calibrated!\n";
420
 
                                return;
421
 
                        }
422
 
                        
423
 
                // calculate the (linear) calibration function
424
 
                makeLinearRegression_(corr_masses,found_ref_masses);
425
 
                static_cast<ExperimentalSettings&>(calibrated_exp) = exp;
426
 
                calibrated_exp.resize(exp.size());
427
 
    
428
 
                // apply the calibration function to each peak
429
 
                for(Size spec=0;spec <  exp.size(); ++spec)
430
 
      {
431
 
                                // calibrate only MS1 spectra
432
 
                                if(exp[spec].getMSLevel() != 1)
433
 
                                        {
434
 
                                                calibrated_exp[spec] = exp[spec];
435
 
                                                continue;
436
 
                                        }
437
 
 
438
 
                                // copy the spectrum data
439
 
                                calibrated_exp[spec] = exp[spec];
440
 
 
441
 
                                for(unsigned int peak=0;peak <  exp[spec].size(); ++peak)
442
 
                                        {
443
 
#ifdef DEBUG_CALIBRATION
444
 
                                                        std::cout << exp[spec][peak].getMZ()<< "\t";                                                                                    
445
 
#endif
446
 
                                                        DoubleReal mz = exp[spec][peak].getMZ();
447
 
                                                        mz = trafo_.apply(mz);
448
 
                                                        calibrated_exp[spec][peak].setMZ(mz);
449
 
 
450
 
#ifdef DEBUG_CALIBRATION
451
 
                                                std::cout << calibrated_exp[spec][peak].getMZ() << std::endl;
452
 
#endif
453
 
 
454
 
                                        }
455
 
                                setProgress(spec);
456
 
      }// for(Size spec=0;spec <  exp.size(); ++spec)
457
 
                endProgress();
458
 
                if(trafo_file_name != "")
459
 
                        {
460
 
                                TransformationXMLFile().store(trafo_file_name,trafo_);
461
 
                        }
462
 
        }
463
 
 
 
137
    */
 
138
    void calibrateMapGlobally(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map, std::vector<PeptideIdentification> & ref_ids, String trafo_file_name = "");
 
139
 
 
140
 
 
141
 
 
142
    template <typename InputPeakType>
 
143
    void calibrateMapList(std::vector<MSExperiment<InputPeakType> > & exp_list, std::vector<MSExperiment<InputPeakType> > & calibrated_exp_list, std::vector<DoubleReal> & ref_masses, std::vector<DoubleReal> & detected_background_masses);
 
144
 
 
145
 
 
146
protected:
 
147
 
 
148
    /// the actual calibration function
 
149
    void makeLinearRegression_(std::vector<DoubleReal> & observed_masses, std::vector<DoubleReal> & theoretical_masses);
 
150
 
 
151
    /// check if reference ids contain RT and MZ information as meta values
 
152
    void checkReferenceIds_(std::vector<PeptideIdentification> & pep_ids);
 
153
 
 
154
    /// check if reference ids contain RT and MZ information as meta values
 
155
    void checkReferenceIds_(const FeatureMap<> & feature_map);
 
156
 
 
157
    /// apply transformation to all features (including subordinates and convex hulls)
 
158
    void applyTransformation_(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map);
 
159
 
 
160
    /// here the transformation is stored
 
161
    TransformationDescription trafo_;
 
162
  }; // class InternalCalibration
 
163
 
 
164
 
 
165
  template <typename InputPeakType>
 
166
  void InternalCalibration::calibrateMapSpectrumwise(const MSExperiment<InputPeakType> & exp, MSExperiment<InputPeakType> & calibrated_exp, std::vector<DoubleReal> & ref_masses)
 
167
  {
 
168
#ifdef DEBUG_CALIBRATION
 
169
    std::cout.precision(writtenDigits<DoubleReal>());
 
170
#endif
 
171
    if (exp.empty())
 
172
    {
 
173
      std::cout << "Input is empty." << std::endl;
 
174
      return;
 
175
    }
 
176
 
 
177
    if (exp[0].getType() != SpectrumSettings::PEAKS)
 
178
    {
 
179
      std::cout << "Attention: this function is assuming peak data." << std::endl;
 
180
    }
 
181
    calibrated_exp = exp;
 
182
 
 
183
    Size num_ref_peaks = ref_masses.size();
 
184
    bool use_ppm = (param_.getValue("mz_tolerance_unit") == "ppm") ? true : false;
 
185
    DoubleReal mz_tol = param_.getValue("mz_tolerance");
 
186
    startProgress(0, exp.size(), "calibrate spectra");
 
187
    // for each spectrum
 
188
    for (Size spec = 0; spec <  exp.size(); ++spec)
 
189
    {
 
190
      // calibrate only MS1 spectra
 
191
      if (exp[spec].getMSLevel() != 1)
 
192
      {
 
193
        continue;
 
194
      }
 
195
 
 
196
 
 
197
      std::vector<DoubleReal> corr_masses, rel_errors, found_ref_masses;
 
198
      UInt corr_peaks = 0;
 
199
      for (Size peak = 0; peak <  exp[spec].size(); ++peak)
 
200
      {
 
201
        for (Size ref_peak = 0; ref_peak < num_ref_peaks; ++ref_peak)
 
202
        {
 
203
          if (!use_ppm &&  fabs(exp[spec][peak].getMZ() - ref_masses[ref_peak]) <  mz_tol)
 
204
          {
 
205
            found_ref_masses.push_back(ref_masses[ref_peak]);
 
206
            corr_masses.push_back(exp[spec][peak].getMZ());
 
207
            ++corr_peaks;
 
208
            break;
 
209
          }
 
210
          else if (use_ppm &&  fabs(exp[spec][peak].getMZ() - ref_masses[ref_peak]) / ref_masses[ref_peak] * 1e6 <  mz_tol)
 
211
          {
 
212
            found_ref_masses.push_back(ref_masses[ref_peak]);
 
213
            corr_masses.push_back(exp[spec][peak].getMZ());
 
214
            ++corr_peaks;
 
215
            break;
 
216
          }
 
217
        }
 
218
      }
 
219
      if (corr_peaks < 2)
 
220
      {
 
221
        std::cout << "spec: " << spec
 
222
                  << " less than 2 reference masses were detected within a reasonable error range\n";
 
223
        std::cout << "This spectrum cannot be calibrated!\n";
 
224
        continue;
 
225
      }
 
226
 
 
227
      // determine rel error in ppm for the two reference masses
 
228
      for (Size ref_peak = 0; ref_peak < found_ref_masses.size(); ++ref_peak)
 
229
      {
 
230
        rel_errors.push_back((found_ref_masses[ref_peak] - corr_masses[ref_peak]) / corr_masses[ref_peak] * 1e6);
 
231
      }
 
232
 
 
233
      makeLinearRegression_(corr_masses, found_ref_masses);
 
234
 
 
235
      // now calibrate the whole spectrum
 
236
      for (unsigned int peak = 0; peak <  calibrated_exp[spec].size(); ++peak)
 
237
      {
 
238
#ifdef DEBUG_CALIBRATION
 
239
        std::cout << calibrated_exp[spec][peak].getMZ() << "\t";
 
240
#endif
 
241
        DoubleReal mz = calibrated_exp[spec][peak].getMZ();
 
242
        mz = trafo_.apply(mz);
 
243
        calibrated_exp[spec][peak].setMZ(mz);
 
244
#ifdef DEBUG_CALIBRATION
 
245
        std::cout << calibrated_exp[spec][peak].getMZ() << std::endl;
 
246
#endif
 
247
 
 
248
      }
 
249
      setProgress(spec);
 
250
    }  // for(Size spec=0;spec <  exp.size(); ++spec)
 
251
    endProgress();
 
252
  }
 
253
 
 
254
  template <typename InputPeakType>
 
255
  void InternalCalibration::calibrateMapGlobally(const MSExperiment<InputPeakType> & exp,
 
256
                                                 MSExperiment<InputPeakType> & calibrated_exp,
 
257
                                                 std::vector<PeptideIdentification> & ref_ids, String trafo_file_name)
 
258
  {
 
259
    bool use_ppm = param_.getValue("mz_tolerance_unit") == "ppm" ? true : false;
 
260
    DoubleReal mz_tolerance = param_.getValue("mz_tolerance");
 
261
    if (exp.empty())
 
262
    {
 
263
      std::cout << "Input is empty." << std::endl;
 
264
      return;
 
265
    }
 
266
 
 
267
    if (exp[0].getType() != SpectrumSettings::PEAKS)
 
268
    {
 
269
      std::cout << "Attention: this function is assuming peak data." << std::endl;
 
270
    }
 
271
    // check if the ids contain meta information about the peak positions
 
272
    checkReferenceIds_(ref_ids);
 
273
 
 
274
    std::vector<DoubleReal> theoretical_masses, observed_masses;
 
275
    for (Size p_id = 0; p_id < ref_ids.size(); ++p_id)
 
276
    {
 
277
      for (Size p_h = 0; p_h < ref_ids[p_id].getHits().size(); ++p_h)
 
278
      {
 
279
        Int charge = ref_ids[p_id].getHits()[p_h].getCharge();
 
280
        DoubleReal theo_mass = ref_ids[p_id].getHits()[p_h].getSequence().getMonoWeight(Residue::Full, charge) / (DoubleReal)charge;
 
281
        // first find corresponding ms1-spectrum
 
282
        typename MSExperiment<InputPeakType>::ConstIterator rt_iter = exp.RTBegin(ref_ids[p_id].getMetaValue("RT"));
 
283
        while (rt_iter != exp.begin() && rt_iter->getMSLevel() != 1)
 
284
        {
 
285
          --rt_iter;
 
286
        }
 
287
        // now find closest peak
 
288
        typename MSSpectrum<InputPeakType>::ConstIterator mz_iter = rt_iter->MZBegin(ref_ids[p_id].getMetaValue("MZ"));
 
289
        //std::cout << mz_iter->getMZ() <<" "<<(DoubleReal)ref_ids[p_id].getMetaValue("MZ")<<"\t";
 
290
        DoubleReal dist = (DoubleReal)ref_ids[p_id].getMetaValue("MZ") - mz_iter->getMZ();
 
291
        //std::cout << dist << "\t";
 
292
        if ((mz_iter + 1) != rt_iter->end()
 
293
           && fabs((mz_iter + 1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < fabs(dist)
 
294
           && mz_iter != rt_iter->begin()
 
295
           && fabs((mz_iter - 1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < fabs((mz_iter + 1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")))                 // if mz_iter +1 has smaller dist than mz_iter and mz_iter-1
 
296
        {
 
297
          if ((use_ppm &&
 
298
               fabs((mz_iter + 1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) / (DoubleReal)ref_ids[p_id].getMetaValue("MZ") * 1e06 < mz_tolerance) ||
 
299
              (!use_ppm && fabs((mz_iter + 1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < mz_tolerance))
 
300
          {
 
301
            //std::cout <<(mz_iter +1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")<<"\t";
 
302
            observed_masses.push_back((mz_iter + 1)->getMZ());
 
303
            theoretical_masses.push_back(theo_mass);
 
304
            //std::cout << (mz_iter +1)->getMZ() << " ~ "<<theo_mass << " charge: "<<ref_ids[p_id].getHits()[p_h].getCharge()
 
305
            //<< "\tplus 1"<< std::endl;
 
306
          }
 
307
        }
 
308
        else if (mz_iter != rt_iter->begin()
 
309
                && fabs((mz_iter - 1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < fabs(dist))                        // if mz_iter-1 has smaller dist than mz_iter
 
310
        {
 
311
          if ((use_ppm &&
 
312
               fabs((mz_iter - 1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) / (DoubleReal)ref_ids[p_id].getMetaValue("MZ") * 1e06 < mz_tolerance) ||
 
313
              (!use_ppm && fabs((mz_iter - 1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < mz_tolerance))
 
314
          {
 
315
            //std::cout <<(mz_iter -1)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")<<"\t";
 
316
            observed_masses.push_back((mz_iter - 1)->getMZ());
 
317
            theoretical_masses.push_back(theo_mass);
 
318
            //std::cout << (mz_iter -1)->getMZ() << " ~ "<<theo_mass << " charge: "<<ref_ids[p_id].getHits()[p_h].getCharge()
 
319
            //<< "\tminus 1"<< std::endl;
 
320
          }
 
321
        }
 
322
        else
 
323
        {
 
324
          if ((use_ppm &&
 
325
               fabs((mz_iter)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) / (DoubleReal)ref_ids[p_id].getMetaValue("MZ") * 1e06 < mz_tolerance) ||
 
326
              (!use_ppm && fabs((mz_iter)->getMZ() - (DoubleReal)ref_ids[p_id].getMetaValue("MZ")) < mz_tolerance))
 
327
          {
 
328
 
 
329
            observed_masses.push_back(mz_iter->getMZ());
 
330
            theoretical_masses.push_back(theo_mass);
 
331
//                                      std::cout <<"\t"<< mz_iter->getMZ() << " ~ "<<theo_mass<< " charge: "<<ref_ids[p_id].getHits()[p_h].getCharge()
 
332
//                                                          << "\tat mz_iter"<< std::endl;
 
333
          }
 
334
        }
 
335
      }
 
336
    }
 
337
 
 
338
    makeLinearRegression_(observed_masses, theoretical_masses);
 
339
    static_cast<ExperimentalSettings &>(calibrated_exp) = exp;
 
340
    calibrated_exp.resize(exp.size());
 
341
 
 
342
    // for each spectrum
 
343
    for (Size spec = 0; spec <  exp.size(); ++spec)
 
344
    {
 
345
      // calibrate only MS1 spectra
 
346
      if (exp[spec].getMSLevel() != 1)
 
347
      {
 
348
        calibrated_exp[spec] = exp[spec];
 
349
        continue;
 
350
      }
 
351
      // copy the spectrum meta data
 
352
      calibrated_exp[spec] = exp[spec];
 
353
 
 
354
      for (unsigned int peak = 0; peak <  exp[spec].size(); ++peak)
 
355
      {
 
356
#ifdef DEBUG_CALIBRATION
 
357
        std::cout << exp[spec][peak].getMZ() << "\t";
 
358
#endif
 
359
        DoubleReal mz = exp[spec][peak].getMZ();
 
360
        mz = trafo_.apply(mz);
 
361
        calibrated_exp[spec][peak].setMZ(mz);
 
362
#ifdef DEBUG_CALIBRATION
 
363
        std::cout << calibrated_exp[spec][peak].getMZ() << std::endl;
 
364
#endif
 
365
 
 
366
      }
 
367
    }  // for(Size spec=0;spec <  exp.size(); ++spec)
 
368
    if (trafo_file_name != "")
 
369
    {
 
370
      TransformationXMLFile().store(trafo_file_name, trafo_);
 
371
    }
 
372
  }
 
373
 
 
374
  template <typename InputPeakType>
 
375
  void InternalCalibration::calibrateMapGlobally(const MSExperiment<InputPeakType> & exp, MSExperiment<InputPeakType> & calibrated_exp, std::vector<DoubleReal> & ref_masses, String trafo_file_name)
 
376
  {
 
377
    if (exp.empty())
 
378
    {
 
379
      std::cout << "Input is empty." << std::endl;
 
380
      return;
 
381
    }
 
382
 
 
383
    if (exp[0].getType() != SpectrumSettings::PEAKS)
 
384
    {
 
385
      std::cout << "Attention: this function is assuming peak data." << std::endl;
 
386
    }
 
387
 
 
388
 
 
389
    Size num_ref_peaks = ref_masses.size();
 
390
    bool use_ppm = (param_.getValue("mz_tolerance_unit") == "ppm") ? true : false;
 
391
    DoubleReal mz_tol = param_.getValue("mz_tolerance");
 
392
    startProgress(0, exp.size(), "calibrate spectra");
 
393
    std::vector<DoubleReal> corr_masses, rel_errors, found_ref_masses;
 
394
    UInt corr_peaks = 0;
 
395
    // for each spectrum
 
396
    for (Size spec = 0; spec <  exp.size(); ++spec)
 
397
    {
 
398
      // calibrate only MS1 spectra
 
399
      if (exp[spec].getMSLevel() != 1)
 
400
        continue;
 
401
      for (Size peak = 0; peak <  exp[spec].size(); ++peak)
 
402
      {
 
403
        for (Size ref_peak = 0; ref_peak < num_ref_peaks; ++ref_peak)
 
404
        {
 
405
          if (!use_ppm &&  fabs(exp[spec][peak].getMZ() - ref_masses[ref_peak]) <  mz_tol)
 
406
          {
 
407
            found_ref_masses.push_back(ref_masses[ref_peak]);
 
408
            corr_masses.push_back(exp[spec][peak].getMZ());
 
409
            ++corr_peaks;
 
410
            break;
 
411
          }
 
412
          else if (use_ppm &&  fabs(exp[spec][peak].getMZ() - ref_masses[ref_peak]) / ref_masses[ref_peak] * 1e6 <  mz_tol)
 
413
          {
 
414
            found_ref_masses.push_back(ref_masses[ref_peak]);
 
415
            corr_masses.push_back(exp[spec][peak].getMZ());
 
416
            ++corr_peaks;
 
417
            break;
 
418
          }
 
419
        }
 
420
      }
 
421
    }
 
422
    if (corr_peaks < 2)
 
423
    {
 
424
      std::cout << "Less than 2 reference masses were detected within a reasonable error range\n";
 
425
      std::cout << "This spectrum cannot be calibrated!\n";
 
426
      return;
 
427
    }
 
428
 
 
429
    // calculate the (linear) calibration function
 
430
    makeLinearRegression_(corr_masses, found_ref_masses);
 
431
    static_cast<ExperimentalSettings &>(calibrated_exp) = exp;
 
432
    calibrated_exp.resize(exp.size());
 
433
 
 
434
    // apply the calibration function to each peak
 
435
    for (Size spec = 0; spec <  exp.size(); ++spec)
 
436
    {
 
437
      // calibrate only MS1 spectra
 
438
      if (exp[spec].getMSLevel() != 1)
 
439
      {
 
440
        calibrated_exp[spec] = exp[spec];
 
441
        continue;
 
442
      }
 
443
 
 
444
      // copy the spectrum data
 
445
      calibrated_exp[spec] = exp[spec];
 
446
 
 
447
      for (unsigned int peak = 0; peak <  exp[spec].size(); ++peak)
 
448
      {
 
449
#ifdef DEBUG_CALIBRATION
 
450
        std::cout << exp[spec][peak].getMZ() << "\t";
 
451
#endif
 
452
        DoubleReal mz = exp[spec][peak].getMZ();
 
453
        mz = trafo_.apply(mz);
 
454
        calibrated_exp[spec][peak].setMZ(mz);
 
455
 
 
456
#ifdef DEBUG_CALIBRATION
 
457
        std::cout << calibrated_exp[spec][peak].getMZ() << std::endl;
 
458
#endif
 
459
 
 
460
      }
 
461
      setProgress(spec);
 
462
    }  // for(Size spec=0;spec <  exp.size(); ++spec)
 
463
    endProgress();
 
464
    if (trafo_file_name != "")
 
465
    {
 
466
      TransformationXMLFile().store(trafo_file_name, trafo_);
 
467
    }
 
468
  }
464
469
 
465
470
} // namespace OpenMS
466
471
 
467
472
#endif // OPENMS_FILTERING_CALIBRATION_INTERNALCALIBRATION_H
468