1
// -*- mode: C++; tab-width: 2; -*-
4
// --------------------------------------------------------------------------
5
// OpenMS Mass Spectrometry Framework
6
// --------------------------------------------------------------------------
7
// Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
9
// This library is free software; you can redistribute it and/or
10
// modify it under the terms of the GNU Lesser General Public
11
// License as published by the Free Software Foundation; either
12
// version 2.1 of the License, or (at your option) any later version.
14
// This library is distributed in the hope that it will be useful,
15
// but WITHOUT ANY WARRANTY; without even the implied warranty of
16
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
// Lesser General Public License for more details.
19
// You should have received a copy of the GNU Lesser General Public
20
// License along with this library; if not, write to the Free Software
21
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
// --------------------------------------------------------------------------
24
// $Maintainer: Clemens Groepl $
26
// --------------------------------------------------------------------------
28
#include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/LmaIsotopeFitter1D.h>
29
#include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/InterpolationModel.h>
30
#include <OpenMS/MATH/STATISTICS/StatisticFunctions.h>
31
#include <OpenMS/CONCEPT/Constants.h>
34
#include <boost/math/special_functions/fpclassify.hpp>
39
LmaIsotopeFitter1D::LmaIsotopeFitter1D()
42
setName(getProductName());
43
defaults_.setValue("averagines:C",0.04443989f,"Number of C atoms per Dalton of mass.", StringList::create("advanced"));
44
defaults_.setValue("averagines:H",0.06981572f,"Number of H atoms per Dalton of mass.", StringList::create("advanced"));
45
defaults_.setValue("averagines:N",0.01221773f,"Number of N atoms per Dalton of mass.", StringList::create("advanced"));
46
defaults_.setValue("averagines:O",0.01329399f,"Number of O atoms per Dalton of mass.", StringList::create("advanced"));
47
defaults_.setValue("averagines:S",0.00037525f,"Number of S atoms per Dalton of mass.", StringList::create("advanced"));
49
defaults_.setValue("isotope:trim_right_cutoff",0.001,"Cutoff in averagine distribution, trailing isotopes below this relative intensity are not considered.", StringList::create("advanced"));
50
defaults_.setValue("isotope:maximum",100,"Maximum isotopic rank to be considered.", StringList::create("advanced"));
51
defaults_.setValue("isotope:distance",1.000495,"Distance between consecutive isotopic peaks.", StringList::create("advanced"));
52
defaults_.setValue("isotope:stdev",0.1,"Standard deviation of gaussian applied to the averagine isotopic pattern to simulate the inaccuracy of the mass spectrometer.", StringList::create("advanced"));
53
defaults_.setValue("charge",1,"Charge state of the model.", StringList::create("advanced"));
54
defaults_.setValue("statistics:mean",0.0,"Centroid m/z (as opposed to monoisotopic m/z).", StringList::create("advanced"));
55
defaults_.setValue("statistics:variance",1.0,"Variance of the model.", StringList::create("advanced"));
56
defaults_.setValue("interpolation_step",0.1,"Sampling rate for the interpolation of the model function.", StringList::create("advanced"));
57
defaults_.setValue("total_intensity",100.0,"Total intensity under the curve in mz dimension.", StringList::create("advanced"));
58
defaults_.setValue("monoisotopic_mass",0.0,"Monoisotopic mz of the model.", StringList::create("advanced"));
63
LmaIsotopeFitter1D::LmaIsotopeFitter1D(const LmaIsotopeFitter1D& source)
64
: LevMarqFitter1D(source)
69
LmaIsotopeFitter1D::~LmaIsotopeFitter1D()
73
LmaIsotopeFitter1D& LmaIsotopeFitter1D::operator = (const LmaIsotopeFitter1D& source)
75
if (&source == this) return *this;
77
LevMarqFitter1D::operator = (source);
83
Int LmaIsotopeFitter1D::residual_(const gsl_vector* x, void* params, gsl_vector* f)
85
Size n = static_cast<LmaIsotopeFitter1D::Data*> (params) ->n;
86
RawDataArrayType set = static_cast<LmaIsotopeFitter1D::Data*> (params) ->set;
87
ContainerType isotopes_exact = static_cast<LmaIsotopeFitter1D::Data*> (params) ->isotopes_exact;
88
CoordinateType isotope_distance = static_cast<LmaIsotopeFitter1D::Data*> (params) ->isotope_distance;
89
CoordinateType sigma = static_cast<LmaIsotopeFitter1D::Data*> (params) ->sigma;
90
//CoordinateType stdev = static_cast<LmaIsotopeFitter1D::Data*> (params) ->isotopes_stdev;
92
CoordinateType A = gsl_vector_get( x, 0 );
93
CoordinateType stdev = static_cast<LmaIsotopeFitter1D::Data*> (params) ->isotopes_stdev; //gsl_vector_get( x, 1 );
94
CoordinateType mono_mz = gsl_vector_get( x, 1 );
96
CoordinateType Yi = 0.0;
98
// iterate over all points of the signal
99
for ( Size i = 0; i < n; ++i )
101
CoordinateType m = set[i].getPos();
103
CoordinateType term1 = A/(sqrt(2*Constants::PI)*stdev);
104
CoordinateType termSum = 0;
105
for (Size j=0; j<isotopes_exact.size(); ++j)
107
termSum += isotopes_exact[j]*exp(-pow(m-mono_mz-j*isotope_distance,2)/(2*stdev*stdev)) ;
112
gsl_vector_set( f, i, (Yi - set[i].getIntensity())/sigma );
118
Int LmaIsotopeFitter1D::jacobian_(const gsl_vector* x, void* params, gsl_matrix* J)
120
Size n = static_cast<LmaIsotopeFitter1D::Data*> (params) ->n;
121
RawDataArrayType set = static_cast<LmaIsotopeFitter1D::Data*> (params) ->set;
122
ContainerType isotopes_exact = static_cast<LmaIsotopeFitter1D::Data*> (params) ->isotopes_exact;
123
CoordinateType isotope_distance = static_cast<LmaIsotopeFitter1D::Data*> (params) ->isotope_distance;
124
CoordinateType sigma = static_cast<LmaIsotopeFitter1D::Data*> (params) ->sigma;
125
//CoordinateType stdev = static_cast<LmaIsotopeFitter1D::Data*> (params) ->isotopes_stdev;
127
CoordinateType A = gsl_vector_get( x, 0 );
128
CoordinateType stdev = static_cast<LmaIsotopeFitter1D::Data*> (params) ->isotopes_stdev; //gsl_vector_get( x, 1 );
129
CoordinateType mono_mz = gsl_vector_get( x, 1 );
131
// iterate over all points of the signal
132
for ( Size i = 0; i < n; ++i )
134
CoordinateType m = set[i].getPos();
136
CoordinateType term1 = sqrt(2*Constants::PI)*stdev;
137
CoordinateType termSum1 = 0.0;
138
CoordinateType termSum2 = 0.0;
139
// CoordinateType termSum3 = 0.0;
140
CoordinateType termExp = 0.0;
142
for (Size j=0; j<isotopes_exact.size(); ++j)
144
termExp = exp(-pow(m-mono_mz-j*isotope_distance,2)/(2*stdev*stdev));
145
termSum1 += isotopes_exact[j] * termExp;
146
termSum2 += isotopes_exact[j] * termExp * ((m-mono_mz-j*isotope_distance)/(stdev*stdev));
147
// termSum3 += isotopes_exact[j] * termExp * (-1/stdev + (pow(m-mono_mz-j*isotope_distance,2)/(stdev*stdev*stdev)));
150
CoordinateType f_a = (1/term1 * termSum1);
151
// CoordinateType f_stdev = (A/term1 * termSum3);
152
CoordinateType f_mono_mz = (A/term1 * termSum2);
154
// set the jacobian matrix
155
gsl_matrix_set( J, i, 0, f_a/sigma );
156
// gsl_matrix_set( J, i, 1, f_stdev );
157
gsl_matrix_set( J, i, 1, f_mono_mz/sigma );
163
Int LmaIsotopeFitter1D::evaluate_(const gsl_vector* x, void* params, gsl_vector* f, gsl_matrix* J)
165
LmaIsotopeFitter1D::residual_( x, params, f );
166
LmaIsotopeFitter1D::jacobian_( x, params, J );
171
void LmaIsotopeFitter1D::printState_(Int iter, gsl_multifit_fdfsolver * s)
173
printf ( "iter: %4u x = % 15.8f % 15.8f |f(x)| = %g\n", iter,
174
gsl_vector_get( s->x, 0 ),
175
gsl_vector_get( s->x, 1 ),
176
//gsl_vector_get( s->x, 2 ),
177
gsl_blas_dnrm2( s->f ) );
180
void LmaIsotopeFitter1D::setInitialParameters_()
182
// compute the relative abundance of i-th isotopic peak
183
isotopes_exact_.clear();
184
typedef std::vector < DoubleReal > ContainerType;
185
CoordinateType mass = mean_ * charge_;
187
Int C_num = Int( 0.5 + mass * averagine_[C]);
188
Int N_num = Int( 0.5 + mass * averagine_[N]);
189
Int O_num = Int( 0.5 + mass * averagine_[O]);
190
Int H_num = Int( 0.5 + mass * averagine_[H]);
191
Int S_num = Int( 0.5 + mass * averagine_[S]);
194
if (C_num) form.append("C").append(String(C_num));
195
if (H_num) form.append("H").append(String(H_num));
196
if (N_num) form.append("N").append(String(N_num));
197
if (O_num) form.append("O").append(String(O_num));
198
if (S_num) form.append("S").append(String(S_num));
200
EmpiricalFormula formula(form);
201
typedef IsotopeDistribution::iterator IsoIter;
202
IsotopeDistribution isotope_distribution = formula.getIsotopeDistribution(max_isotope_);
203
isotope_distribution.trimRight(trim_right_cutoff_);
204
isotope_distribution.renormalize();
206
// compute relative abundance of i-th isotopic peak of a peptide
207
CoordinateType isotopes_mean = 0;
209
for ( IsoIter iter = isotope_distribution.begin(); iter != isotope_distribution.end(); ++iter, ++i)
211
isotopes_exact_.push_back(iter->second);
212
isotopes_mean += iter->second*i;
214
isotopes_mean *= isotope_distance_ / charge_;
216
// compute monoisotopic mass
217
if (monoisotopic_mz_ == 0.0)
219
monoisotopic_mz_ = mean_-isotopes_mean;
223
LmaIsotopeFitter1D::QualityType LmaIsotopeFitter1D::fit1d(const RawDataArrayType& set, InterpolationModel*& model)
225
// Calculate bounding box
226
min_ = max_ = set[0].getPos();
227
for ( Size pos=1; pos < set.size(); ++pos)
229
CoordinateType tmp = set[pos].getPos();
230
if ( min_ > tmp ) min_ = tmp;
231
if ( max_ < tmp ) max_ = tmp;
234
if (monoisotopic_mz_ != 0.0) monoisotopic_mass_known_ = true;
235
else monoisotopic_mass_known_ = false;
237
// Enlarge the bounding box by a few multiples of the standard deviation
239
stdev1_ = sqrt ( statistics_.variance() ) * tolerance_stdev_box_;
244
// Compute start parameters
245
setInitialParameters_();
247
// Set advanced parameters for residual_ und jacobian_ method
248
LmaIsotopeFitter1D::Data d;
251
d.isotopes_exact = isotopes_exact_;
252
d.isotope_distance = isotope_distance_;
253
// d.mono_known = monoisotopic_mass_known_;
254
// d.monoisotopic_mz = monoisotopic_mz_;
255
d.isotopes_stdev = isotope_stdev_;
256
d.sigma = 0.1; // gaussian noise (standard deviation = 0.1)
258
// The various _f, _df and _fdf of the LM loop return GSL_EDOM if any parameter is < 0
260
// Optimize parameters with Levenberg-Marquardt algorithm (GLS)
261
CoordinateType x_init[ 2 ] = { total_intensity_/100 , /*isotope_stdev_,*/ monoisotopic_mz_ };
262
optimize_(set, 2, x_init, &(residual_), &(jacobian_), &(evaluate_), &d);
264
// Set optimized parameters
265
total_intensity_ = x_init[0];
266
// isotope_stdev_ = x_init[1];
267
monoisotopic_mz_ = x_init[1];
269
#ifdef DEBUG_FEATUREFINDER
270
if ( getGslStatus_() != "success" )
272
std::cout << "status: " << getGslStatus_() << std::endl;
278
model = static_cast<InterpolationModel*> (Factory<BaseModel<1> >::create("GaussModel"));
279
model->setInterpolationStep( interpolation_step_ );
282
tmp.setValue( "bounding_box:min", min_ );
283
tmp.setValue( "bounding_box:max", max_ );
284
tmp.setValue( "statistics:variance", statistics_.variance() );
285
tmp.setValue( "statistics:mean", statistics_.mean() );
286
model->setParameters( tmp );
290
model = static_cast<InterpolationModel*> (Factory<BaseModel<1> >::create("LmaIsotopeModel"));
292
Param iso_param = this->param_.copy( "isotope_model:", true );
293
iso_param.removeAll("stdev");
294
model->setParameters( iso_param );
295
model->setInterpolationStep( interpolation_step_ );
298
tmp.setValue( "total_intensity", total_intensity_ );
299
tmp.setValue( "monoisotopic_mz", monoisotopic_mz_ );
300
tmp.setValue( "bounding_box:min", min_ );
301
tmp.setValue( "bounding_box:max", max_ );
302
tmp.setValue( "statistics:mean", statistics_.mean() );
303
tmp.setValue( "charge", static_cast<Int>( charge_ ) );
304
tmp.setValue( "isotope:stdev", isotope_stdev_ );
305
tmp.setValue( "isotope:maximum", max_isotope_ );
307
model->setParameters( tmp );
310
// calculate pearson correlation
311
std::vector<Real> real_data;
312
real_data.reserve(set.size());
313
std::vector<Real> model_data;
314
model_data.reserve(set.size());
316
for (Size i=0; i < set.size(); ++i)
318
real_data.push_back(set[i].getIntensity());
319
model_data.push_back( model->getIntensity( DPosition<1>(set[i].getPosition()) ) );
322
QualityType correlation = Math::pearsonCorrelationCoefficient(real_data.begin(), real_data.end(), model_data.begin(), model_data.end());
323
if (boost::math::isnan(correlation)) correlation = -1.0;
328
void LmaIsotopeFitter1D::updateMembers_()
330
LevMarqFitter1D::updateMembers_();
332
total_intensity_ = param_.getValue("total_intensity");
333
monoisotopic_mz_ = param_.getValue("monoisotopic_mass");
335
statistics_.setVariance(param_.getValue("statistics:variance"));
336
charge_ = param_.getValue("charge");
337
isotope_stdev_ = param_.getValue("isotope:stdev");
339
charge_ = param_.getValue("charge");
340
mean_ = param_.getValue("statistics:mean");
341
max_isotope_ = param_.getValue("isotope:maximum");
343
trim_right_cutoff_ = param_.getValue("isotope:trim_right_cutoff");
344
isotope_distance_ = param_.getValue("isotope:distance");
346
averagine_[C] = param_.getValue("averagines:C");
347
averagine_[H] = param_.getValue("averagines:H");
348
averagine_[N] = param_.getValue("averagines:N");
349
averagine_[O] = param_.getValue("averagines:O");
350
averagine_[S] = param_.getValue("averagines:S");