~ubuntu-branches/ubuntu/wily/openms/wily

« back to all changes in this revision

Viewing changes to source/TRANSFORMATIONS/FEATUREFINDER/LmaIsotopeFitter1D.C

  • Committer: Package Import Robot
  • Author(s): Filippo Rusconi
  • Date: 2012-11-12 15:58:12 UTC
  • Revision ID: package-import@ubuntu.com-20121112155812-vr15wtg9b50cuesg
Tags: upstream-1.9.0
ImportĀ upstreamĀ versionĀ 1.9.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
// -*- mode: C++; tab-width: 2; -*-
 
2
// vi: set ts=2:
 
3
//
 
4
// --------------------------------------------------------------------------
 
5
//                   OpenMS Mass Spectrometry Framework
 
6
// --------------------------------------------------------------------------
 
7
//  Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
 
8
//
 
9
//  This library is free software; you can redistribute it and/or
 
10
//  modify it under the terms of the GNU Lesser General Public
 
11
//  License as published by the Free Software Foundation; either
 
12
//  version 2.1 of the License, or (at your option) any later version.
 
13
//
 
14
//  This library is distributed in the hope that it will be useful,
 
15
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
 
16
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 
17
//  Lesser General Public License for more details.
 
18
//
 
19
//  You should have received a copy of the GNU Lesser General Public
 
20
//  License along with this library; if not, write to the Free Software
 
21
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
22
//
 
23
// --------------------------------------------------------------------------
 
24
// $Maintainer: Clemens Groepl $
 
25
// $Authors: $
 
26
// --------------------------------------------------------------------------
 
27
 
 
28
#include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/LmaIsotopeFitter1D.h>
 
29
#include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/InterpolationModel.h>
 
30
#include <OpenMS/MATH/STATISTICS/StatisticFunctions.h>
 
31
#include <OpenMS/CONCEPT/Constants.h>
 
32
 
 
33
#include <numeric>
 
34
#include <boost/math/special_functions/fpclassify.hpp>
 
35
 
 
36
namespace OpenMS
 
37
{
 
38
 
 
39
                LmaIsotopeFitter1D::LmaIsotopeFitter1D()
 
40
    : LevMarqFitter1D()
 
41
    {
 
42
      setName(getProductName());
 
43
                  defaults_.setValue("averagines:C",0.04443989f,"Number of C atoms per Dalton of mass.", StringList::create("advanced"));
 
44
                  defaults_.setValue("averagines:H",0.06981572f,"Number of H atoms per Dalton of mass.", StringList::create("advanced"));
 
45
                  defaults_.setValue("averagines:N",0.01221773f,"Number of N atoms per Dalton of mass.", StringList::create("advanced"));
 
46
                  defaults_.setValue("averagines:O",0.01329399f,"Number of O atoms per Dalton of mass.", StringList::create("advanced"));
 
47
                  defaults_.setValue("averagines:S",0.00037525f,"Number of S atoms per Dalton of mass.", StringList::create("advanced"));
 
48
 
 
49
      defaults_.setValue("isotope:trim_right_cutoff",0.001,"Cutoff in averagine distribution, trailing isotopes below this relative intensity are not considered.", StringList::create("advanced"));
 
50
      defaults_.setValue("isotope:maximum",100,"Maximum isotopic rank to be considered.", StringList::create("advanced"));
 
51
      defaults_.setValue("isotope:distance",1.000495,"Distance between consecutive isotopic peaks.", StringList::create("advanced"));
 
52
      defaults_.setValue("isotope:stdev",0.1,"Standard deviation of gaussian applied to the averagine isotopic pattern to simulate the inaccuracy of the mass spectrometer.", StringList::create("advanced"));
 
53
      defaults_.setValue("charge",1,"Charge state of the model.", StringList::create("advanced"));
 
54
      defaults_.setValue("statistics:mean",0.0,"Centroid m/z (as opposed to monoisotopic m/z).", StringList::create("advanced"));
 
55
      defaults_.setValue("statistics:variance",1.0,"Variance of the model.", StringList::create("advanced"));
 
56
      defaults_.setValue("interpolation_step",0.1,"Sampling rate for the interpolation of the model function.", StringList::create("advanced"));
 
57
      defaults_.setValue("total_intensity",100.0,"Total intensity under the curve in mz dimension.", StringList::create("advanced"));
 
58
      defaults_.setValue("monoisotopic_mass",0.0,"Monoisotopic mz of the model.", StringList::create("advanced"));
 
59
 
 
60
      defaultsToParam_();
 
61
    }
 
62
 
 
63
    LmaIsotopeFitter1D::LmaIsotopeFitter1D(const LmaIsotopeFitter1D& source)
 
64
    : LevMarqFitter1D(source)
 
65
    {
 
66
      updateMembers_();
 
67
    }
 
68
 
 
69
    LmaIsotopeFitter1D::~LmaIsotopeFitter1D()
 
70
    {
 
71
    }
 
72
 
 
73
    LmaIsotopeFitter1D& LmaIsotopeFitter1D::operator = (const LmaIsotopeFitter1D& source)
 
74
    {
 
75
      if (&source == this) return *this;
 
76
 
 
77
      LevMarqFitter1D::operator = (source);
 
78
      updateMembers_();
 
79
 
 
80
      return *this;
 
81
    }
 
82
 
 
83
    Int LmaIsotopeFitter1D::residual_(const gsl_vector* x, void* params, gsl_vector* f)
 
84
    {
 
85
        Size n = static_cast<LmaIsotopeFitter1D::Data*> (params) ->n;
 
86
        RawDataArrayType set = static_cast<LmaIsotopeFitter1D::Data*> (params) ->set;
 
87
        ContainerType isotopes_exact = static_cast<LmaIsotopeFitter1D::Data*> (params) ->isotopes_exact;
 
88
        CoordinateType isotope_distance = static_cast<LmaIsotopeFitter1D::Data*> (params) ->isotope_distance;
 
89
        CoordinateType sigma = static_cast<LmaIsotopeFitter1D::Data*> (params) ->sigma;
 
90
        //CoordinateType stdev = static_cast<LmaIsotopeFitter1D::Data*> (params) ->isotopes_stdev;
 
91
 
 
92
        CoordinateType A = gsl_vector_get( x, 0 );
 
93
        CoordinateType stdev = static_cast<LmaIsotopeFitter1D::Data*> (params) ->isotopes_stdev; //gsl_vector_get( x, 1 );
 
94
        CoordinateType mono_mz = gsl_vector_get( x, 1 );
 
95
 
 
96
        CoordinateType Yi = 0.0;
 
97
 
 
98
        // iterate over all points of the signal
 
99
        for ( Size i = 0; i < n; ++i )
 
100
        {
 
101
            CoordinateType m = set[i].getPos();
 
102
 
 
103
            CoordinateType term1 = A/(sqrt(2*Constants::PI)*stdev);
 
104
            CoordinateType termSum = 0;
 
105
            for (Size j=0; j<isotopes_exact.size(); ++j)
 
106
            {
 
107
              termSum += isotopes_exact[j]*exp(-pow(m-mono_mz-j*isotope_distance,2)/(2*stdev*stdev)) ;
 
108
            }
 
109
 
 
110
            Yi = term1*termSum;
 
111
 
 
112
            gsl_vector_set( f, i, (Yi - set[i].getIntensity())/sigma );
 
113
        }
 
114
 
 
115
        return GSL_SUCCESS;
 
116
    }
 
117
 
 
118
    Int LmaIsotopeFitter1D::jacobian_(const gsl_vector* x, void* params, gsl_matrix* J)
 
119
    {
 
120
        Size n = static_cast<LmaIsotopeFitter1D::Data*> (params) ->n;
 
121
        RawDataArrayType set = static_cast<LmaIsotopeFitter1D::Data*> (params) ->set;
 
122
        ContainerType isotopes_exact = static_cast<LmaIsotopeFitter1D::Data*> (params) ->isotopes_exact;
 
123
        CoordinateType isotope_distance = static_cast<LmaIsotopeFitter1D::Data*> (params) ->isotope_distance;
 
124
        CoordinateType sigma = static_cast<LmaIsotopeFitter1D::Data*> (params) ->sigma;
 
125
        //CoordinateType stdev = static_cast<LmaIsotopeFitter1D::Data*> (params) ->isotopes_stdev;
 
126
 
 
127
        CoordinateType A = gsl_vector_get( x, 0 );
 
128
        CoordinateType stdev = static_cast<LmaIsotopeFitter1D::Data*> (params) ->isotopes_stdev; //gsl_vector_get( x, 1 );
 
129
        CoordinateType mono_mz = gsl_vector_get( x, 1 );
 
130
 
 
131
        // iterate over all points of the signal
 
132
        for ( Size i = 0; i < n; ++i )
 
133
        {
 
134
            CoordinateType m = set[i].getPos();
 
135
 
 
136
            CoordinateType term1 = sqrt(2*Constants::PI)*stdev;
 
137
            CoordinateType termSum1 = 0.0;
 
138
            CoordinateType termSum2 = 0.0;
 
139
          //  CoordinateType termSum3 = 0.0;
 
140
            CoordinateType termExp = 0.0;
 
141
 
 
142
            for (Size j=0; j<isotopes_exact.size(); ++j)
 
143
            {
 
144
              termExp = exp(-pow(m-mono_mz-j*isotope_distance,2)/(2*stdev*stdev));
 
145
              termSum1 += isotopes_exact[j] * termExp;
 
146
              termSum2 += isotopes_exact[j] * termExp * ((m-mono_mz-j*isotope_distance)/(stdev*stdev));
 
147
           //   termSum3 += isotopes_exact[j] * termExp * (-1/stdev + (pow(m-mono_mz-j*isotope_distance,2)/(stdev*stdev*stdev)));
 
148
            }
 
149
 
 
150
                CoordinateType f_a = (1/term1 * termSum1);
 
151
         //     CoordinateType f_stdev = (A/term1 * termSum3);
 
152
                CoordinateType f_mono_mz = (A/term1 * termSum2);
 
153
 
 
154
            // set the jacobian matrix
 
155
            gsl_matrix_set( J, i, 0, f_a/sigma );
 
156
         //   gsl_matrix_set( J, i, 1, f_stdev );
 
157
                gsl_matrix_set( J, i, 1, f_mono_mz/sigma );
 
158
        }
 
159
 
 
160
        return GSL_SUCCESS;
 
161
    }
 
162
 
 
163
    Int LmaIsotopeFitter1D::evaluate_(const gsl_vector* x, void* params, gsl_vector* f, gsl_matrix* J)
 
164
    {
 
165
        LmaIsotopeFitter1D::residual_( x, params, f );
 
166
        LmaIsotopeFitter1D::jacobian_( x, params, J );
 
167
 
 
168
        return GSL_SUCCESS;
 
169
    }
 
170
 
 
171
    void LmaIsotopeFitter1D::printState_(Int iter, gsl_multifit_fdfsolver * s)
 
172
    {
 
173
        printf ( "iter: %4u x = % 15.8f % 15.8f |f(x)| = %g\n", iter,
 
174
                 gsl_vector_get( s->x, 0 ),
 
175
                 gsl_vector_get( s->x, 1 ),
 
176
                 //gsl_vector_get( s->x, 2 ),
 
177
                 gsl_blas_dnrm2( s->f ) );
 
178
    }
 
179
 
 
180
    void LmaIsotopeFitter1D::setInitialParameters_()
 
181
    {
 
182
        // compute the relative abundance of i-th isotopic peak
 
183
        isotopes_exact_.clear();
 
184
        typedef std::vector < DoubleReal > ContainerType;
 
185
        CoordinateType mass = mean_ * charge_;
 
186
 
 
187
        Int C_num = Int( 0.5 + mass * averagine_[C]);
 
188
        Int N_num = Int( 0.5 + mass * averagine_[N]);
 
189
        Int O_num = Int( 0.5 + mass * averagine_[O]);
 
190
        Int H_num = Int( 0.5 + mass * averagine_[H]);
 
191
        Int S_num = Int( 0.5 + mass * averagine_[S]);
 
192
 
 
193
        String form("");
 
194
        if (C_num) form.append("C").append(String(C_num));
 
195
        if (H_num) form.append("H").append(String(H_num));
 
196
        if (N_num) form.append("N").append(String(N_num));
 
197
        if (O_num) form.append("O").append(String(O_num));
 
198
        if (S_num) form.append("S").append(String(S_num));
 
199
 
 
200
        EmpiricalFormula formula(form);
 
201
        typedef IsotopeDistribution::iterator IsoIter;
 
202
        IsotopeDistribution isotope_distribution = formula.getIsotopeDistribution(max_isotope_);
 
203
        isotope_distribution.trimRight(trim_right_cutoff_);
 
204
        isotope_distribution.renormalize();
 
205
 
 
206
        // compute relative abundance of i-th isotopic peak of a peptide
 
207
        CoordinateType isotopes_mean = 0;
 
208
        Int i=0;
 
209
        for (   IsoIter iter = isotope_distribution.begin(); iter != isotope_distribution.end(); ++iter, ++i)
 
210
        {
 
211
          isotopes_exact_.push_back(iter->second);
 
212
          isotopes_mean += iter->second*i;
 
213
        }
 
214
        isotopes_mean *= isotope_distance_ / charge_;
 
215
 
 
216
        // compute monoisotopic mass
 
217
        if (monoisotopic_mz_ == 0.0)
 
218
        {
 
219
          monoisotopic_mz_ = mean_-isotopes_mean;
 
220
        }
 
221
    }
 
222
 
 
223
    LmaIsotopeFitter1D::QualityType LmaIsotopeFitter1D::fit1d(const RawDataArrayType& set, InterpolationModel*& model)
 
224
    {
 
225
        // Calculate bounding box
 
226
        min_ = max_ = set[0].getPos();
 
227
        for ( Size pos=1; pos < set.size(); ++pos)
 
228
        {
 
229
            CoordinateType tmp = set[pos].getPos();
 
230
            if ( min_ > tmp ) min_ = tmp;
 
231
            if ( max_ < tmp ) max_ = tmp;
 
232
        }
 
233
 
 
234
        if (monoisotopic_mz_ != 0.0) monoisotopic_mass_known_ = true;
 
235
        else monoisotopic_mass_known_ = false;
 
236
 
 
237
        // Enlarge the bounding box by a few multiples of the standard deviation
 
238
        {
 
239
          stdev1_ = sqrt ( statistics_.variance() ) * tolerance_stdev_box_;
 
240
          min_ -= stdev1_;
 
241
          max_ += stdev1_;
 
242
        }
 
243
 
 
244
        // Compute start parameters
 
245
        setInitialParameters_();
 
246
 
 
247
        // Set advanced parameters for residual_  und jacobian_ method
 
248
        LmaIsotopeFitter1D::Data d;
 
249
        d.n = set.size();
 
250
        d.set = set;
 
251
        d.isotopes_exact = isotopes_exact_;
 
252
        d.isotope_distance = isotope_distance_;
 
253
        // d.mono_known = monoisotopic_mass_known_;
 
254
        // d.monoisotopic_mz = monoisotopic_mz_;
 
255
        d.isotopes_stdev = isotope_stdev_;
 
256
        d.sigma = 0.1; // gaussian noise (standard deviation = 0.1)
 
257
 
 
258
        // The various _f, _df and _fdf of the LM loop return GSL_EDOM if any parameter is < 0
 
259
 
 
260
          // Optimize parameters with Levenberg-Marquardt algorithm (GLS)
 
261
        CoordinateType x_init[ 2 ] = { total_intensity_/100 , /*isotope_stdev_,*/ monoisotopic_mz_ };
 
262
        optimize_(set, 2, x_init, &(residual_), &(jacobian_), &(evaluate_), &d);
 
263
 
 
264
        // Set optimized parameters
 
265
        total_intensity_ = x_init[0];
 
266
        // isotope_stdev_ = x_init[1];
 
267
        monoisotopic_mz_ = x_init[1];
 
268
 
 
269
#ifdef DEBUG_FEATUREFINDER
 
270
        if ( getGslStatus_() != "success" )
 
271
        {
 
272
          std::cout << "status: " << getGslStatus_() << std::endl;
 
273
        }
 
274
#endif
 
275
        // build model
 
276
        if (charge_==0)
 
277
        {
 
278
          model = static_cast<InterpolationModel*> (Factory<BaseModel<1> >::create("GaussModel"));
 
279
          model->setInterpolationStep( interpolation_step_ );
 
280
 
 
281
          Param tmp;
 
282
          tmp.setValue( "bounding_box:min", min_ );
 
283
          tmp.setValue( "bounding_box:max", max_ );
 
284
          tmp.setValue( "statistics:variance", statistics_.variance() );
 
285
          tmp.setValue( "statistics:mean", statistics_.mean() );
 
286
          model->setParameters( tmp );
 
287
        }
 
288
        else
 
289
        {
 
290
          model = static_cast<InterpolationModel*> (Factory<BaseModel<1> >::create("LmaIsotopeModel"));
 
291
 
 
292
          Param iso_param = this->param_.copy( "isotope_model:", true );
 
293
          iso_param.removeAll("stdev");
 
294
          model->setParameters( iso_param );
 
295
          model->setInterpolationStep( interpolation_step_ );
 
296
 
 
297
          Param tmp;
 
298
          tmp.setValue( "total_intensity", total_intensity_ );
 
299
          tmp.setValue( "monoisotopic_mz", monoisotopic_mz_ );
 
300
          tmp.setValue( "bounding_box:min", min_ );
 
301
          tmp.setValue( "bounding_box:max", max_ );
 
302
          tmp.setValue( "statistics:mean", statistics_.mean() );
 
303
          tmp.setValue( "charge", static_cast<Int>( charge_ ) );
 
304
          tmp.setValue( "isotope:stdev", isotope_stdev_ );
 
305
                      tmp.setValue( "isotope:maximum", max_isotope_ );
 
306
 
 
307
          model->setParameters( tmp );
 
308
        }
 
309
 
 
310
                        // calculate pearson correlation
 
311
                std::vector<Real> real_data;
 
312
        real_data.reserve(set.size());
 
313
        std::vector<Real> model_data;
 
314
        model_data.reserve(set.size());
 
315
 
 
316
        for (Size i=0; i < set.size(); ++i)
 
317
        {
 
318
           real_data.push_back(set[i].getIntensity());
 
319
           model_data.push_back( model->getIntensity( DPosition<1>(set[i].getPosition()) ) );
 
320
        }
 
321
 
 
322
        QualityType correlation = Math::pearsonCorrelationCoefficient(real_data.begin(), real_data.end(), model_data.begin(), model_data.end());
 
323
        if (boost::math::isnan(correlation)) correlation = -1.0;
 
324
 
 
325
        return correlation;
 
326
    }
 
327
 
 
328
    void LmaIsotopeFitter1D::updateMembers_()
 
329
    {
 
330
      LevMarqFitter1D::updateMembers_();
 
331
 
 
332
      total_intensity_ = param_.getValue("total_intensity");
 
333
      monoisotopic_mz_ = param_.getValue("monoisotopic_mass");
 
334
 
 
335
      statistics_.setVariance(param_.getValue("statistics:variance"));
 
336
      charge_ = param_.getValue("charge");
 
337
      isotope_stdev_ = param_.getValue("isotope:stdev");
 
338
 
 
339
      charge_ = param_.getValue("charge");
 
340
      mean_ = param_.getValue("statistics:mean");
 
341
      max_isotope_ = param_.getValue("isotope:maximum");
 
342
 
 
343
      trim_right_cutoff_ = param_.getValue("isotope:trim_right_cutoff");
 
344
      isotope_distance_ = param_.getValue("isotope:distance");
 
345
 
 
346
      averagine_[C] = param_.getValue("averagines:C");
 
347
      averagine_[H] = param_.getValue("averagines:H");
 
348
      averagine_[N] = param_.getValue("averagines:N");
 
349
      averagine_[O] = param_.getValue("averagines:O");
 
350
      averagine_[S] = param_.getValue("averagines:S");
 
351
    }
 
352
 
 
353
}