1
// -*- mode: C++; tab-width: 2; -*-
4
// --------------------------------------------------------------------------
5
// OpenMS Mass Spectrometry Framework
6
// --------------------------------------------------------------------------
7
// Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
9
// This library is free software; you can redistribute it and/or
10
// modify it under the terms of the GNU Lesser General Public
11
// License as published by the Free Software Foundation; either
12
// version 2.1 of the License, or (at your option) any later version.
14
// This library is distributed in the hope that it will be useful,
15
// but WITHOUT ANY WARRANTY; without even the implied warranty of
16
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
// Lesser General Public License for more details.
19
// You should have received a copy of the GNU Lesser General Public
20
// License along with this library; if not, write to the Free Software
21
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
// --------------------------------------------------------------------------
24
// $Maintainer: Stephan Aiche $
25
// $Authors: Stephan Aiche $
26
// --------------------------------------------------------------------------
28
#ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_EGHTRACEFITTER_H
29
#define OPENMS_TRANSFORMATIONS_FEATUREFINDER_EGHTRACEFITTER_H
32
#include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/FeatureFinderAlgorithmPickedHelperStructs.h>
33
#include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/TraceFitter.h>
39
* @brief A RT Profile fitter using an Exponential Gaussian Hybrid background model
41
* Lan K, Jorgenson JW.
42
* <b>A hybrid of exponential and gaussian functions as a simple model of asymmetric chromatographic peaks.</b>
43
* <em>Journal of Chromatography A.</em> 915 (1-2)p. 1-13.
44
* Available at: http://linkinghub.elsevier.com/retrieve/pii/S0021967301005945
46
* @htmlinclude OpenMS_EGHTraceFitter.parameters
48
* @experimental Needs further testing on real data. Note that the tests are currently also focused on testing the EGH as replacement for the gaussian.
50
template<class PeakType>
52
: public TraceFitter<PeakType>
57
//setName("EGHTraceFitter");
60
EGHTraceFitter(const EGHTraceFitter& other)
61
: TraceFitter<PeakType>(other)
63
this->height_ = other.height_;
64
this->apex_rt_ = other.apex_rt_;
65
this->sigma_square_ = other.sigma_square_;
66
this->tau_ = other.tau_;
68
this->sigma_5_bound_ = other.sigma_5_bound_;
69
this->fwhm_bound_ = other.fwhm_bound_;
74
EGHTraceFitter& operator = (const EGHTraceFitter& source)
76
TraceFitter<PeakType>::operator = (source);
78
this->height_ = source.height_;
79
this->apex_rt_ = source.apex_rt_;
80
this->sigma_square_ = source.sigma_square_;
81
this->tau_ = source.tau_;
83
this->sigma_5_bound_ = source.sigma_5_bound_;
84
this->fwhm_bound_ = source.fwhm_bound_;
91
virtual ~EGHTraceFitter()
96
// override important methods
97
void fit(FeatureFinderAlgorithmPickedHelperStructs::MassTraces<PeakType> & traces)
99
setInitialParameters_(traces);
101
double x_init[NUM_PARAMS_] = {height_, apex_rt_, sigma_square_, tau_};
103
Size num_params = NUM_PARAMS_;
105
TraceFitter<PeakType>::optimize_(traces, num_params , x_init ,
106
&( EGHTraceFitter<PeakType>::residual_ ) ,
107
&( EGHTraceFitter<PeakType>::jacobian_ ) ,
108
&( EGHTraceFitter<PeakType>::evaluate_ ));
111
DoubleReal getLowerRTBound() const
113
return sigma_5_bound_.first;
116
DoubleReal getTau() const
121
DoubleReal getUpperRTBound() const
123
return sigma_5_bound_.second;
126
DoubleReal getHeight() const
131
DoubleReal getSigmaSquare() const
133
return sigma_square_;
136
DoubleReal getCenter() const
141
bool checkMaximalRTSpan(const DoubleReal max_rt_span)
143
return ( (sigma_5_bound_.second - sigma_5_bound_.first) > max_rt_span*region_rt_span_);
146
virtual bool checkMinimalRTSpan(const std::pair<DoubleReal,DoubleReal> & rt_bounds, const DoubleReal min_rt_span)
148
return (rt_bounds.second-rt_bounds.first) < min_rt_span * (sigma_5_bound_.second - sigma_5_bound_.first);
151
DoubleReal computeTheoretical(const FeatureFinderAlgorithmPickedHelperStructs::MassTrace<PeakType> & trace, Size k)
153
double rt = trace.peaks[k].first;
154
double t_diff,t_diff2,denominator = 0.0;
157
t_diff = rt - apex_rt_;
158
t_diff2 = t_diff * t_diff; // -> (t - t_R)^2
160
denominator = 2 * sigma_square_ + tau_ * t_diff; // -> 2\sigma_{g}^{2} + \tau \left(t - t_R\right)
162
if(denominator > 0.0)
164
fegh = trace.theoretical_int * height_ * exp(- t_diff2 / denominator);
170
virtual DoubleReal getFeatureIntensityContribution()
172
return height_ * (fwhm_bound_.second - fwhm_bound_.first);
175
DoubleReal getFWHM() const
178
std::pair<DoubleReal, DoubleReal> bounds = getAlphaBoundaries_(0.5);
179
return bounds.second - bounds.first;
182
virtual String getGnuplotFormula(FeatureFinderAlgorithmPickedHelperStructs::MassTrace<PeakType> const & trace, const char function_name, const DoubleReal baseline, const DoubleReal rt_shift)
185
s << String(function_name) << "(x)= " << baseline << " + ";
186
s << "("; // the overall bracket
187
s << "((" << 2 * sigma_square_ << " + " << tau_ << " * (x - " << (rt_shift + apex_rt_) << " )) > 0) ? "; // condition
188
s << (trace.theoretical_int * height_) << " * exp(-1 * (x - " << (rt_shift + apex_rt_) << ")**2 " <<
190
" ( " << 2 * sigma_square_ << " + " << tau_ << " * (x - " << (rt_shift + apex_rt_) << " )))";
192
return String(s.str());
199
DoubleReal sigma_square_;
202
std::pair<DoubleReal, DoubleReal> sigma_5_bound_;
203
std::pair<DoubleReal, DoubleReal> fwhm_bound_;
205
DoubleReal region_rt_span_;
207
static const Size NUM_PARAMS_ = 4;
210
* @brief Return an ordered pair of the positions where the EGH reaches a height of alpha * height of the EGH
212
* @param alpha The alpha at which the boundaries should be computed
214
std::pair<DoubleReal, DoubleReal> getAlphaBoundaries_(const DoubleReal alpha) const
216
std::pair<DoubleReal, DoubleReal> bounds;
217
DoubleReal L = log(alpha);
219
(pow(L*tau_, 2) / 4) - 2*L*sigma_square_
223
s1 = (-1 * (L * tau_) / 2) + s;
224
s2 = (-1 * (L * tau_) / 2) - s;
226
// the smaller one (should be < 0) = lower bound
227
bounds.first = apex_rt_ + std::min(s1,s2);
228
// bigger one (should be > 0) = upper bound
229
bounds.second = apex_rt_ + std::max(s1,s2);
234
void getOptimizedParameters_(gsl_multifit_fdfsolver * fdfsolver)
236
height_ = gsl_vector_get(fdfsolver->x, 0);
237
apex_rt_ = gsl_vector_get(fdfsolver->x, 1);
238
sigma_square_ = gsl_vector_get(fdfsolver->x, 2);
239
tau_ = gsl_vector_get(fdfsolver->x, 3);
241
// we set alpha to 0.04 which is conceptually equal to
242
// 2.5 sigma for lower and upper bound
243
sigma_5_bound_ = getAlphaBoundaries_(0.043937);
244
// this is needed for the intensity contribution -> this is the 1.25 sigma region
245
fwhm_bound_ = getAlphaBoundaries_(0.45783);
248
static Int residual_(const gsl_vector* param, void* data, gsl_vector* f)
250
FeatureFinderAlgorithmPickedHelperStructs::MassTraces<PeakType>* traces = static_cast<FeatureFinderAlgorithmPickedHelperStructs::MassTraces<PeakType>*>(data);
252
double H = gsl_vector_get( param, 0 );
253
double tR = gsl_vector_get( param, 1 );
254
double sigma_square = gsl_vector_get( param, 2 );
255
double tau = gsl_vector_get( param, 3 );
257
double t_diff,t_diff2,denominator = 0.0;
262
for (Size t = 0 ; t < traces->size() ; ++t)
264
FeatureFinderAlgorithmPickedHelperStructs::MassTrace<PeakType>& trace = traces->at(t);
265
for (Size i = 0 ; i < trace.peaks.size() ; ++i)
267
DoubleReal rt = trace.peaks[i].first;
270
t_diff2 = t_diff * t_diff; // -> (t - t_R)^2
272
denominator = 2 * sigma_square + tau * t_diff; // -> 2\sigma_{g}^{2} + \tau \left(t - t_R\right)
274
if(denominator > 0.0)
276
fegh = traces->baseline + trace.theoretical_int * H * exp(- t_diff2 / denominator);
283
gsl_vector_set( f, count, ( fegh - trace.peaks[i].second->getIntensity() ) );
290
static Int jacobian_(const gsl_vector* param, void* data, gsl_matrix* J)
292
FeatureFinderAlgorithmPickedHelperStructs::MassTraces<PeakType>* traces = static_cast<FeatureFinderAlgorithmPickedHelperStructs::MassTraces<PeakType>*>(data);
294
double H = gsl_vector_get( param, 0 );
295
double tR = gsl_vector_get( param, 1 );
296
double sigma_square = gsl_vector_get( param, 2 );
297
double tau = gsl_vector_get( param, 3 );
299
double derivative_H, derivative_tR, derivative_sigma_square, derivative_tau = 0.0;
300
double t_diff,t_diff2,exp1,denominator = 0.0;
303
for (Size t = 0; t < traces->size() ; ++t)
305
FeatureFinderAlgorithmPickedHelperStructs::MassTrace<PeakType>& trace = traces->at(t);
306
for (Size i = 0; i < trace.peaks.size() ; ++i)
308
DoubleReal rt = trace.peaks[i].first;
311
t_diff2 = t_diff * t_diff; // -> (t - t_R)^2
313
denominator = 2 * sigma_square + tau * t_diff; // -> 2\sigma_{g}^{2} + \tau \left(t - t_R\right)
317
exp1 = exp(- t_diff2 / denominator);
319
// \partial H f_{egh}(t) = \exp\left( \frac{-\left(t-t_R \right)}{2\sigma_{g}^{2} + \tau \left(t - t_R\right)} \right)
320
derivative_H = trace.theoretical_int * exp1;
322
// \partial t_R f_{egh}(t) &=& H \exp \left( \frac{-\left(t-t_R \right)}{2\sigma_{g}^{2} + \tau \left(t - t_R\right)} \right) \left( \frac{\left( 4 \sigma_{g}^{2} + \tau \left(t-t_R \right) \right) \left(t-t_R \right)}{\left( 2\sigma_{g}^{2} + \tau \left(t - t_R\right) \right)^2} \right)
323
derivative_tR = trace.theoretical_int * H * exp1 * ( ((4 * sigma_square + tau * t_diff) * t_diff) / (denominator * denominator));
325
// \partial \sigma_{g}^{2} f_{egh}(t) &=& H \exp \left( \frac{-\left(t-t_R \right)^2}{2\sigma_{g}^{2} + \tau \left(t - t_R\right)} \right) \left( \frac{ 2 \left(t - t_R\right)^2}{\left( 2\sigma_{g}^{2} + \tau \left(t - t_R\right) \right)^2} \right)
326
derivative_sigma_square = trace.theoretical_int * H * exp1 * ((2 * t_diff2) / (denominator * denominator));
328
// \partial \tau f_{egh}(t) &=& H \exp \left( \frac{-\left(t-t_R \right)^2}{2\sigma_{g}^{2} + \tau \left(t - t_R\right)} \right) \left( \frac{ \left(t - t_R\right)^3}{\left( 2\sigma_{g}^{2} + \tau \left(t - t_R\right) \right)^2} \right)
329
derivative_tau = trace.theoretical_int * H * exp1 * ((t_diff * t_diff2) / (denominator * denominator));
335
derivative_sigma_square = 0.0;
336
derivative_tau = 0.0;
339
// set the jacobian matrix
340
gsl_matrix_set( J, count, 0, derivative_H );
341
gsl_matrix_set( J, count, 1, derivative_tR );
342
gsl_matrix_set( J, count, 2, derivative_sigma_square );
343
gsl_matrix_set( J, count, 3, derivative_tau );
351
static Int evaluate_(const gsl_vector* param, void* data, gsl_vector* f, gsl_matrix* J)
353
residual_(param, data, f);
354
jacobian_(param, data, J);
358
void setInitialParameters_(FeatureFinderAlgorithmPickedHelperStructs::MassTraces<PeakType> & traces)
360
LOG_DEBUG << "EGHTraceFitter->setInitialParameters(..)" << std::endl;
361
LOG_DEBUG << "Traces length: " << traces.size() << std::endl;
362
LOG_DEBUG << "Max trace: " << traces.max_trace << std::endl;
364
// initial values for externals
365
height_ = traces[traces.max_trace].max_peak->getIntensity() - traces.baseline;
366
LOG_DEBUG << "height: " << height_ << std::endl;
367
apex_rt_ = traces[traces.max_trace].max_rt;
368
LOG_DEBUG << "apex_rt: " << apex_rt_ << std::endl;
369
region_rt_span_ = traces[traces.max_trace].peaks.back().first-traces[traces.max_trace].peaks[0].first;
370
LOG_DEBUG << "region_rt_span_: " << region_rt_span_ << std::endl;
372
const PeakType * max_peak = traces[traces.max_trace].peaks.begin()->second;
375
for (Size i = 1; i < traces[traces.max_trace].peaks.size() ; ++i)
377
if (traces[traces.max_trace].peaks[i].second->getIntensity()>max_peak->getIntensity())
379
max_peak = traces[traces.max_trace].peaks[i].second;
385
LOG_DEBUG << "max_pos: " << max_pos << std::endl;
386
if (traces[traces.max_trace].peaks.size() < 3)
388
// TODO: abort the whole thing here??
389
// because below we REQUIRE at least three peaks!!!
392
Size filter_max_pos = traces[traces.max_trace].peaks.size() - 2;
394
// compute a smoothed value for the maxima
395
// if the maximum is close to the borders, we need to think of something...
396
DoubleReal smoothed_height;
397
if ((max_pos < 2) || (max_pos+2 >= traces[traces.max_trace].peaks.size()))
399
// ... too close to border... no smoothing
400
smoothed_height = traces[traces.max_trace].peaks[max_pos].second->getIntensity();
401
// TODO: does this trace even make sense?! why wasn't it extended it further? or should we have skipped it beforehand?
405
smoothed_height = (traces[traces.max_trace].peaks[max_pos - 2].second->getIntensity()
406
+ traces[traces.max_trace].peaks[max_pos - 1].second->getIntensity()
407
+ traces[traces.max_trace].peaks[max_pos].second->getIntensity()
408
+ traces[traces.max_trace].peaks[max_pos + 1].second->getIntensity()
409
+ traces[traces.max_trace].peaks[max_pos + 2].second->getIntensity() ) / 5.0;
412
// use moving average filter to avoid bad initial values
413
// moving average of size 5
414
// TODO: optimize windows size
415
while(i > 2 && i < filter_max_pos)
418
DoubleReal smoothed = (traces[traces.max_trace].peaks[i - 2].second->getIntensity()
419
+ traces[traces.max_trace].peaks[i - 1].second->getIntensity()
420
+ traces[traces.max_trace].peaks[i].second->getIntensity()
421
+ traces[traces.max_trace].peaks[i + 1].second->getIntensity()
422
+ traces[traces.max_trace].peaks[i + 2].second->getIntensity() ) / 5.0;
424
if(smoothed / smoothed_height < 0.5) break;
427
LOG_DEBUG << "Left alpha at " << i << " with " << traces[traces.max_trace].peaks[i].first << std::endl;
428
double A = apex_rt_ - traces[traces.max_trace].peaks[i].first;
431
while(i < filter_max_pos && i > 2)
433
DoubleReal smoothed = (traces[traces.max_trace].peaks[i - 2].second->getIntensity()
434
+ traces[traces.max_trace].peaks[i - 1].second->getIntensity()
435
+ traces[traces.max_trace].peaks[i].second->getIntensity()
436
+ traces[traces.max_trace].peaks[i + 1].second->getIntensity()
437
+ traces[traces.max_trace].peaks[i + 2].second->getIntensity() ) / 5.0;
439
if(smoothed / smoothed_height < 0.5) break;
442
LOG_DEBUG << "Right alpha at " << i << " with " << traces[traces.max_trace].peaks[i].first << std::endl;
443
double B = traces[traces.max_trace].peaks[i].first - apex_rt_;
445
//LOG_DEBUG << "A: " << A << std::endl;
446
//LOG_DEBUG << "B: " << B << std::endl;
448
// compute estimates for tau / sigma_square based on A/B
449
double log_alpha = log(0.5);
451
tau_ = ( -1 / log_alpha ) * (B - A);
452
LOG_DEBUG << "tau: " << tau_ << std::endl;
453
sigma_square_ = ( -1 / (2 * log_alpha) ) * (B * A);
454
LOG_DEBUG << "sigma_square: " << sigma_square_ << std::endl;
457
virtual void updateMembers_()
459
TraceFitter<PeakType>::updateMembers_();
462
void printState_(SignedSize iter, gsl_multifit_fdfsolver * s )
464
LOG_DEBUG << "iter: " << iter << " "
465
<< "height: " << gsl_vector_get( s->x, 0 ) << " "
466
<< "apex_rt: " << gsl_vector_get( s->x, 1 ) << " "
467
<< "sigma_square: " << gsl_vector_get( s->x, 2 ) << " "
468
<< "tau: " << gsl_vector_get( s->x, 3 ) << " "
469
<< "|f(x)| = " << gsl_blas_dnrm2( s->f ) << std::endl;
474
} // namespace OpenMS
476
#endif // #ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERALGORITHMPICKEDTRACEFITTERGAUSS_H